loomvale-image-lab

Starting on Zero

App Files Files Community

Theloomvale commited on 25 days ago

Commit

ef904ea

verified ·

1 Parent(s): e6939d7

Update app.py

Browse files

Files changed (1) hide show

app.py +308 -183

app.py CHANGED Viewed

@@ -1,232 +1,357 @@
-# app.py
-# Loomvale Image Lab – SDXL prompt runner (fits n8n + Google Sheet pipeline)
-#
-# Inputs match the order used in the Gradio /api/predict endpoint:
-#  0) model_key
-#  1) prompt
-#  2) negative
-#  3) width
-#  4) height
-#  5) steps
-#  6) guidance
-#  7) images_per_prompt
-#  8) seed (or None/-1 for random)
-#  9) use_lcm (bool)
 import os
-from functools import lru_cache
-from typing import List, Optional
 import gradio as gr
 from PIL import Image
-import torch
 from diffusers import (
     StableDiffusionXLPipeline,
-    DPMSolverMultistepScheduler,
-    EulerAncestralDiscreteScheduler,
     LCMScheduler,
 )
-SPACE_TITLE = "Loomvale Image Lab"
-DEFAULT_NEGATIVE = (
-    "text, watermark, signature, logo, jpeg artifacts, lowres, blurry, oversharp, "
-    "deformed, extra fingers, extra limbs, bad hands, bad anatomy, duplicate, worst quality"
-)
-# ---- Models ---------------------------------------------------------------
-MODEL_MAP = {
-    # Default – painterly / versatile (anime+semi-real)
-    "SDXL Base 1.0 (stabilityai/stable-diffusion-xl-base-1.0)": {
-        "repo": "stabilityai/stable-diffusion-xl-base-1.0",
-        "variant": "fp16",
-        "scheduler": "dpmpp",
-        "hint": "Use steps ~24–36, guidance 5.5–7.5",
-    },
-    # Very fast drafts
-    "SDXL Turbo (stabilityai/sdxl-turbo)": {
-        "repo": "stabilityai/sdxl-turbo",
-        "variant": "fp16",
-        "scheduler": "euler_a",
-        "hint": "Use steps 1–4, guidance 0.5–2.0",
-    },
-    # Photoreal leaning XL (popular community model)
-    "Realistic Vision XL (photoreal)": {
-        "repo": "SG161222/RealVisXL_V4.0",
-        "variant": "fp16",
-        "scheduler": "dpmpp",
-        "hint": "Use steps ~25–40, guidance 4.5–7.0",
-    },
-}
-def _device_dtype():
-    if torch.cuda.is_available():
-        return "cuda", torch.float16
-    elif torch.backends.mps.is_available():
-        return "mps", torch.float16
-    return "cpu", torch.float32
-DEVICE, DTYPE = _device_dtype()
-@lru_cache(maxsize=3)
-def load_pipeline(model_key: str) -> StableDiffusionXLPipeline:
-    spec = MODEL_MAP[model_key]
-    repo_id = spec["repo"]
-    variant = spec.get("variant", None)
-    pipe = StableDiffusionXLPipeline.from_pretrained(
-        repo_id,
-        torch_dtype=DTYPE,
-        use_safetensors=True,
-        add_watermarker=False,
-        variant=variant,
-    )
-    # default scheduler
-    sched = spec.get("scheduler", "dpmpp")
-    if sched == "dpmpp":
-        pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-    elif sched == "euler_a":
-        pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
-    pipe = pipe.to(DEVICE)
-    return pipe
-def apply_lcm(pipe: StableDiffusionXLPipeline) -> StableDiffusionXLPipeline:
-    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
     return pipe
-def _seed_to_generator(seed: Optional[int]) -> Optional[torch.Generator]:
-    if seed is None or seed == -1:
-        return None
-    g = torch.Generator(device=DEVICE)
-    g.manual_seed(int(seed))
-    return g
-def run_infer(
-    model_key: str,
-    prompt: str,
-    negative: str,
     width: int,
     height: int,
     steps: int,
-    guidance: float,
-    images_per_prompt: int,
-    seed: Optional[int],
-    use_lcm: bool,
 ) -> List[Image.Image]:
-    assert width % 64 == 0 and height % 64 == 0, "Width/Height must be divisible by 64"
-    pipe = load_pipeline(model_key)
-    pipe.set_progress_bar_config(disable=True)
-    # LCM toggle
-    pipe_to_use = apply_lcm(pipe) if use_lcm else pipe
-    # Turbo best practice: very low guidance, very few steps
-    if "sdxl-turbo" in MODEL_MAP[model_key]["repo"]:
-        guidance = max(0.0, min(guidance, 2.0))
-        steps = max(1, min(steps, 6))
-    generator = _seed_to_generator(seed)
-    # Do the inference
-    with torch.inference_mode():
-        out = pipe_to_use(
-            prompt=prompt,
-            negative_prompt=negative or DEFAULT_NEGATIVE,
             width=width,
             height=height,
             num_inference_steps=steps,
-            guidance_scale=guidance,
-            generator=generator,
-            num_images_per_prompt=images_per_prompt,
         )
-    images: List[Image.Image] = out.images
     return images
-# ---- Gradio UI ------------------------------------------------------------
-def ui_predict(
-    model_key: str,
-    prompt: str,
-    negative: str,
-    width: int,
-    height: int,
-    steps: int,
-    guidance: float,
-    images_per_prompt: int,
-    seed: int,
-    use_lcm: bool,
-):
-    seed_val = None if seed in (-1, None) else seed
-    imgs = run_infer(
-        model_key=model_key,
-        prompt=prompt.strip(),
-        negative=negative.strip(),
-        width=width,
-        height=height,
-        steps=steps,
-        guidance=guidance,
-        images_per_prompt=images_per_prompt,
-        seed=seed_val,
-        use_lcm=use_lcm,
-    )
-    return imgs
-with gr.Blocks(title=SPACE_TITLE, fill_height=True) as demo:
-    gr.Markdown(f"## {SPACE_TITLE} — SDXL cinematic generator\n"
-                "Paste the prompt built from your Google Sheet "
-                "(**ImagePrompt_Ambience + ImagePrompt_Scenes**) then hit **Run**. "
-                "The API is available at `/api/predict/` for n8n.")
     with gr.Row():
-        model_key = gr.Dropdown(
-            list(MODEL_MAP.keys()),
-            value="SDXL Base 1.0 (stabilityai/stable-diffusion-xl-base-1.0)",
-            label="Model",
-        )
         use_lcm = gr.Checkbox(value=False, label="Use LCM Scheduler (faster)")
     prompt = gr.Textbox(
         label="Prompt (Ambience + 5 Scenes; literal dialogue allowed)",
-        placeholder="e.g., Color theme: Mizu blue… stylized dialogue bubbles (blank)…",
-        lines=10,
     )
     negative = gr.Textbox(
-        label="Negative prompt",
         value=DEFAULT_NEGATIVE,
-        lines=2,
     )
     with gr.Row():
-        width = gr.Slider(640, 1536, value=1024, step=64, label="Width")
-        height = gr.Slider(768, 1664, value=1344, step=64, label="Height")
     with gr.Row():
         steps = gr.Slider(1, 60, value=28, step=1, label="Steps")
-        guidance = gr.Slider(0.0, 12.0, value=6.5, step=0.1, label="Guidance (CFG)")
-        images_per_prompt = gr.Slider(1, 5, value=3, step=1, label="Images per prompt")
         seed = gr.Number(value=-1, precision=0, label="Seed (-1=random)")
     run_btn = gr.Button("Run", variant="primary")
-    gallery = gr.Gallery(label="Output", columns=5, height=480)
     run_btn.click(
-        ui_predict,
-        inputs=[model_key, prompt, negative, width, height, steps, guidance, images_per_prompt, seed, use_lcm],
         outputs=[gallery],
-        api_name="predict",  # enables /api/predict
     )
-if __name__ == "__main__":
-    demo.launch()

 import os
+import io
+import re
+import json
+import base64
+import random
+from typing import List, Tuple, Optional
 import gradio as gr
+import numpy as np
 from PIL import Image
+import torch
 from diffusers import (
     StableDiffusionXLPipeline,
+    AutoPipelineForText2Image,
     LCMScheduler,
 )
+# ---------- Google Sheets helpers ----------
+SHEET_ID = os.getenv("SHEET_ID", "").strip()
+SHEET_NAME = os.getenv("SHEET_NAME", "Pipeline").strip()
+AMBIENCE_COL = os.getenv("AMBIENCE_COL", "ImagePrompt_Ambience")
+SCENES_COL = os.getenv("SCENES_COL", "ImagePrompt_Scenes")
+def _get_ws():
+    """
+    Return a gspread worksheet using service-account JSON from GOOGLE_CREDENTIALS_JSON
+    (secret pasted as full JSON).
+    """
+    if not SHEET_ID:
+        raise RuntimeError("Missing SHEET_ID secret.")
+    raw = os.getenv("GOOGLE_CREDENTIALS_JSON", "")
+    if not raw:
+        raise RuntimeError("Missing GOOGLE_CREDENTIALS_JSON secret.")
+    try:
+        import gspread
+        from google.oauth2.service_account import Credentials
+    except Exception as e:
+        raise RuntimeError("Google dependencies missing: " + str(e))
+    # Accept either raw JSON or base64
+    if not raw.strip().startswith("{"):
+        raw = base64.b64decode(raw).decode("utf-8")
+    info = json.loads(raw)
+    scopes = [
+        "https://www.googleapis.com/auth/spreadsheets",
+        "https://www.googleapis.com/auth/drive",
+    ]
+    creds = Credentials.from_service_account_info(info, scopes=scopes)
+    gc = gspread.authorize(creds)
+    sh = gc.open_by_key(SHEET_ID)
+    return sh.worksheet(SHEET_NAME)
+def _header_map(ws) -> dict:
+    headers = [h.strip() for h in ws.row_values(1)]
+    return {h: i + 1 for i, h in enumerate(headers)}
+def pull_row_from_sheet(row_number: int) -> str:
+    """
+    Read a single row (1-based: header is row 1) and build the prompt:
+    Ambience + Scenes. Returns a single text blob.
+    """
+    ws = _get_ws()
+    hdr = _header_map(ws)
+    if AMBIENCE_COL not in hdr or SCENES_COL not in hdr:
+        raise RuntimeError(
+            f"Sheet is missing required columns: '{AMBIENCE_COL}' and/or '{SCENES_COL}'."
+        )
+    values = ws.row_values(row_number)
+    def _get(col):
+        idx = hdr[col] - 1
+        return values[idx] if idx < len(values) else ""
+    ambience = (_get(AMBIENCE_COL) or "").strip()
+    scenes = (_get(SCENES_COL) or "").strip()
+    if not ambience and not scenes:
+        raise RuntimeError("Row has empty ambience and scenes.")
+    if ambience and scenes:
+        return ambience.rstrip() + "\n\n" + scenes.lstrip()
+    return ambience or scenes
+# ---------- Prompt parsing ----------
+SCENE_SPLIT_RE = re.compile(r"(?:^|\n)\s*Scene\s*[1-5]\s*(?:–|-|—)?\s*", re.IGNORECASE)
+def split_into_scenes(full_text: str) -> List[str]:
+    """
+    Split a long prompt into up to 5 scene blocks by 'Scene 1 ... Scene 5' headings.
+    If not found, treat entire text as a single 'scene'.
+    """
+    # Keep headings by splitting, then re-attaching labels for clarity
+    # First find positions
+    matches = list(SCENE_SPLIT_RE.finditer(full_text))
+    if not matches:
+        return [full_text.strip()] if full_text.strip() else []
+    # Collect segments
+    segments = []
+    for i, m in enumerate(matches):
+        start = m.end()
+        end = matches[i + 1].start() if i + 1 < len(matches) else len(full_text)
+        chunk = full_text[start:end].strip()
+        if chunk:
+            segments.append(chunk)
+    # Limit to first 5 segments
+    return segments[:5]
+def attach_ambience(ambience: str, scene_texts: List[str]) -> List[str]:
+    """
+    Prefix each scene with ambience instructions so the style is consistent.
+    """
+    out = []
+    for s in scene_texts:
+        if ambience.strip():
+            out.append(ambience.strip() + "\n\n" + s.strip())
+        else:
+            out.append(s.strip())
+    return out
+def parse_manual_prompt(long_text: str) -> Tuple[str, List[str]]:
+    """
+    Try to separate 'ambience' lines above Scene 1..5.
+    If no scene headers, we produce a single scene.
+    Return (ambience, scenes_list).
+    """
+    # Try to split by first "Scene 1"
+    m = re.search(r"(?:^|\n)\s*Scene\s*1\b", long_text, flags=re.IGNORECASE)
+    if not m:
+        return ("", [long_text.strip()] if long_text.strip() else [])
+    ambience = long_text[:m.start()].strip()
+    scenes_blob = long_text[m.start():]
+    scenes = split_into_scenes(scenes_blob)
+    return (ambience, scenes)
+# ---------- Diffusers model loading ----------
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
+DEFAULT_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
+REAL_XL = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
+TURBO = "stabilityai/sdxl-turbo"
+PIPE_CACHE = {}
+def load_pipeline(model_id: str, use_lcm: bool):
+    """
+    Load and cache a text2img pipeline. Falls back gracefully if a model
+    requires a different loader.
+    """
+    key = (model_id, use_lcm)
+    if key in PIPE_CACHE:
+        return PIPE_CACHE[key]
+    try:
+        # Auto pipeline handles SDXL Base / Turbo / RealisticVision XL
+        pipe = AutoPipelineForText2Image.from_pretrained(
+            model_id, torch_dtype=DTYPE
+        )
+    except Exception:
+        # fallback to SDXL base
+        pipe = StableDiffusionXLPipeline.from_pretrained(
+            model_id, torch_dtype=DTYPE
+        )
+    if use_lcm:
+        try:
+            pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+        except Exception:
+            # If LCM not compatible, keep default
+            pass
+    pipe.to(DEVICE)
+    PIPE_CACHE[key] = pipe
     return pipe
+# ---------- Generation ----------
+DEFAULT_NEGATIVE = (
+    "text, watermark, signature, logo, jpeg artifacts, lowres, blurry, oversharp, "
+    "deformed, extra fingers, extra limbs, bad hands, bad anatomy, duplicate, worst quality"
+)
+def to_multiple_of_64(x: int) -> int:
+    return max(64, int(round(x / 64)) * 64)
+@torch.inference_mode()
+def generate_images_for_scenes(
+    model_id: str,
+    use_lcm: bool,
+    ambience_and_scenes_text: str,
+    negative_prompt: str,
     width: int,
     height: int,
     steps: int,
+    cfg: float,
+    seed: int,
 ) -> List[Image.Image]:
+    """
+    Parse the combined text, produce 1 image per scene (up to 5), total 5 max.
+    """
+    # Parse manual text into ambience + scenes if it has Scene headers
+    ambience, scenes = parse_manual_prompt(ambience_and_scenes_text)
+    if not scenes:
+        # treat entire text as one scene
+        scenes = [ambience_and_scenes_text.strip()]
+        ambience = ""
+    scenes = scenes[:5]
+    prompts = attach_ambience(ambience, scenes)
+    width = to_multiple_of_64(width)
+    height = to_multiple_of_64(height)
+    gen = torch.Generator(device=DEVICE)
+    if seed is None or seed < 0:
+        seed = random.randint(0, 2**31 - 1)
+    gen = gen.manual_seed(seed)
+    pipe = load_pipeline(model_id, use_lcm)
+    images = []
+    for i, ptxt in enumerate(prompts, start=1):
+        # SDXL Turbo prefers low steps; we still honor the UI value
+        out = pipe(
+            prompt=ptxt,
+            negative_prompt=negative_prompt or DEFAULT_NEGATIVE,
             width=width,
             height=height,
             num_inference_steps=steps,
+            guidance_scale=cfg,
+            generator=gen,
         )
+        img = out.images[0]
+        # add tiny label in metadata for scene index
+        images.append(img)
     return images
+# ---------- Gradio UI + API ----------
+MODEL_CHOICES = [
+    DEFAULT_MODEL,
+    TURBO,
+    REAL_XL,
+]
+with gr.Blocks(title="Loomvale Image Lab") as demo:
+    gr.Markdown("## Loomvale Image Lab — SDXL cinematic generator")
     with gr.Row():
+        model = gr.Dropdown(MODEL_CHOICES, value=DEFAULT_MODEL, label="Model")
         use_lcm = gr.Checkbox(value=False, label="Use LCM Scheduler (faster)")
+    with gr.Row():
+        sheet_row = gr.Number(value=2, precision=0, label="Sheet row (1-based)")
+        pull_btn = gr.Button("Pull from Google Sheet")
     prompt = gr.Textbox(
+        lines=12,
         label="Prompt (Ambience + 5 Scenes; literal dialogue allowed)",
+        placeholder='e.g., Color theme: Mizu blue… stylized dialogue bubbles (blank)…',
     )
     negative = gr.Textbox(
         value=DEFAULT_NEGATIVE,
+        label="Negative prompt",
     )
     with gr.Row():
+        width = gr.Slider(640, 1536, value=1024, step=1, label="Width")
+        height = gr.Slider(768, 1664, value=1344, step=1, label="Height")
     with gr.Row():
         steps = gr.Slider(1, 60, value=28, step=1, label="Steps")
+        cfg = gr.Slider(0.0, 12.0, value=6.5, step=0.1, label="Guidance (CFG)")
         seed = gr.Number(value=-1, precision=0, label="Seed (-1=random)")
     run_btn = gr.Button("Run", variant="primary")
+    gallery = gr.Gallery(label="Output", columns=5, rows=1, height=420)
+    # Pull handler
+    def on_pull(rownum: float):
+        try:
+            r = int(rownum)
+            txt = pull_row_from_sheet(r)
+            return gr.update(value=txt), gr.Info(f"Loaded row {r} from '{SHEET_NAME}'.")
+        except Exception as e:
+            return gr.update(), gr.Error(str(e))
+    pull_btn.click(on_pull, inputs=[sheet_row], outputs=[prompt])
+    # Run handler
+    def on_run(model, use_lcm, prompt_text, negative_text, width_v, height_v, steps_v, cfg_v, seed_v):
+        try:
+            imgs = generate_images_for_scenes(
+                model_id=model,
+                use_lcm=bool(use_lcm),
+                ambience_and_scenes_text=prompt_text or "",
+                negative_prompt=negative_text or DEFAULT_NEGATIVE,
+                width=int(width_v),
+                height=int(height_v),
+                steps=int(steps_v),
+                cfg=float(cfg_v),
+                seed=int(seed_v),
+            )
+            # Convert to displayable
+            return imgs
+        except Exception as e:
+            gr.Error(str(e))
+            return []
     run_btn.click(
+        on_run,
+        inputs=[model, use_lcm, prompt, negative, width, height, steps, cfg, seed],
         outputs=[gallery],
     )
+# Lightweight REST API for n8n: POST /api/predict
+from fastapi import FastAPI
+from fastapi.responses import JSONResponse
+app = gr.mount_gradio_app(FastAPI(), demo, path="/")
+@app.post("/api/predict")
+async def api_predict(payload: dict):
+    try:
+        model_id = payload.get("model", DEFAULT_MODEL)
+        use_lcm = bool(payload.get("use_lcm", False))
+        neg = payload.get("negative_prompt", DEFAULT_NEGATIVE)
+        w = int(payload.get("width", 1024))
+        h = int(payload.get("height", 1344))
+        steps_v = int(payload.get("steps", 28))
+        cfg_v = float(payload.get("cfg", 6.5))
+        seed_v = int(payload.get("seed", -1))
+        # Either prompt text or a sheet row
+        text = payload.get("prompt", "")
+        sheet_row_req = payload.get("sheet_row")
+        if (not text) and sheet_row_req:
+            text = pull_row_from_sheet(int(sheet_row_req))
+        imgs = generate_images_for_scenes(
+            model_id=model_id,
+            use_lcm=use_lcm,
+            ambience_and_scenes_text=text,
+            negative_prompt=neg,
+            width=w,
+            height=h,
+            steps=steps_v,
+            cfg=cfg_v,
+            seed=seed_v,
+        )
+        # Return as temporary URLs (Gradio serves in-session)
+        bufs = []
+        for im in imgs:
+            bio = io.BytesIO()
+            im.save(bio, format="PNG")
+            bufs.append(bio.getvalue())
+        # Gradio's JSONResponse expects base64 or we can just do one-off data URIs
+        # We'll return arrays of base64 PNGs for n8n convenience
+        b64s = [base64.b64encode(b).decode("utf-8") for b in bufs]
+        return JSONResponse({"images_b64_png": b64s, "count": len(b64s)})
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=400)