trilochan's picture
Update app.py
5722a4c verified
import os
import re
from dataclasses import dataclass
from typing import Dict, Any, Optional, Tuple, List
import gradio as gr
from PIL import Image, ImageStat
import numpy as np
# Optional imports for reverse-prompting
try:
import cv2
except Exception:
cv2 = None
try:
from transformers import pipeline
HAS_TRANSFORMERS = True
except Exception:
HAS_TRANSFORMERS = False
APP_TITLE = "Ultra‑Realistic Prompt Builder"
NEGATIVE_BASELINE = (
"cgi, 3d render, cartoon, illustration, plastic/waxy skin, overprocessed, oversharpened halos, "
"lowres, noise, banding, posterization, watermark, text, logo, bad anatomy, extra fingers, "
"deformed hands, blurry, depth map artifacts, harsh HDR, unrealistic colors"
)
@dataclass
class CameraSpec:
cameraBody: str = ""
focalLengthMm: Optional[int] = None
aperture: str = ""
iso: Optional[int] = None
@dataclass
class PromptFields:
subject: str = ""
environment: str = ""
timeWeather: str = ""
camera: CameraSpec = CameraSpec()
composition: str = ""
lighting: str = ""
microDetails: str = ""
motionAtmosphere: str = ""
colorGrade: str = ""
realismCues: bool = True
aspectRatio: str = "4:5"
negatives: str = NEGATIVE_BASELINE
model: str = "sdxl" # "mj" | "sdxl" | "dalle"
# MJ
settings_mj_s: int = 100
settings_mj_chaos: int = 5
settings_mj_seed: int = 42
# SDXL
settings_sdxl_steps: int = 34
settings_sdxl_cfg: int = 5
settings_sdxl_sampler: str = "DPM++ SDE Karras"
settings_sdxl_resolution: str = "1024x1280"
settings_sdxl_refiner: float = 0.25
# DALL·E
settings_dalle_resolution: str = "1024x1024"
def realism_string(enabled: bool) -> str:
if not enabled:
return ""
return (
"Photorealistic, true-to-life colors, subsurface scattering, global illumination, "
"soft shadows, accurate reflections, natural skin, shallow DOF, film grain 3–5%, "
"subtle chromatic aberration, vignette."
)
def safe_join(parts: List[str]) -> str:
return " ".join([p.strip() for p in parts if p and str(p).strip()]).replace(" ", " ").strip()
def build_universal(f: PromptFields) -> str:
s1 = f"Photo of {f.subject}" if f.subject else "Photo"
if f.environment:
s1 += f" in/at {f.environment}"
if f.timeWeather:
s1 += f", {f.timeWeather}"
s1 += "."
cam_bits = []
if f.camera and f.camera.focalLengthMm:
cam_bits.append(f"{f.camera.focalLengthMm}mm lens")
if f.camera and f.camera.aperture:
cam_bits.append(f"at {f.camera.aperture}")
if f.camera and f.camera.iso:
cam_bits.append(f"ISO {f.camera.iso}")
s2 = "Shot with a " + ", ".join(cam_bits) + "." if cam_bits else ""
s3 = f"{f.composition}." if f.composition else ""
s4 = f"Lighting: {f.lighting}." if f.lighting else ""
s5 = f"Materials & micro-detail: {f.microDetails}." if f.microDetails else ""
s6 = f"Motion/atmosphere: {f.motionAtmosphere}." if f.motionAtmosphere else ""
s7 = f"Color & grade: {f.colorGrade}." if f.colorGrade else ""
s8 = realism_string(f.realismCues)
return safe_join([s1, s2, s3, s4, s5, s6, s7, s8])
def format_midjourney(universal: str, f: PromptFields) -> str:
return f"{universal} --style raw --ar {f.aspectRatio} --s {f.settings_mj_s} --chaos {f.settings_mj_chaos} --seed {f.settings_mj_seed}"
def format_sdxl(universal: str, f: PromptFields) -> Dict[str, Any]:
return {
"positive": universal,
"negative": f.negatives or NEGATIVE_BASELINE,
"settings": {
"steps": f.settings_sdxl_steps,
"cfg": f.settings_sdxl_cfg,
"sampler": f.settings_sdxl_sampler,
"resolution": f.settings_sdxl_resolution,
"refiner": f.settings_sdxl_refiner,
"tips": "Use SDXL Refiner at 0.2–0.4 denoise; Upscale 1.5–2.0x for micro-detail"
}
}
def format_dalle(universal: str, f: PromptFields) -> Dict[str, Any]:
prose = f"A high‑resolution photograph. {universal}"
return {"prompt": prose, "resolution": f.settings_dalle_resolution}
def compose(
subject, environment, timeWeather,
cameraBody, focalLengthMm, aperture, iso,
composition, lighting, microDetails, motionAtmosphere, colorGrade,
realismCues, aspectRatio, negatives,
mj_s, mj_chaos, mj_seed,
sdxl_steps, sdxl_cfg, sdxl_sampler, sdxl_resolution, sdxl_refiner,
dalle_resolution
) -> Tuple[str, str, Dict[str, Any], Dict[str, Any], str]:
f = PromptFields(
subject=subject or "",
environment=environment or "",
timeWeather=timeWeather or "",
camera=CameraSpec(
cameraBody=cameraBody or "",
focalLengthMm=int(focalLengthMm) if str(focalLengthMm).strip() else None,
aperture=aperture or "",
iso=int(iso) if str(iso).strip() else None
),
composition=composition or "",
lighting=lighting or "",
microDetails=microDetails or "",
motionAtmosphere=motionAtmosphere or "",
colorGrade=colorGrade or "",
realismCues=bool(realismCues),
aspectRatio=aspectRatio or "4:5",
negatives=negatives or NEGATIVE_BASELINE,
settings_mj_s=int(mj_s) if str(mj_s).strip() else 100,
settings_mj_chaos=int(mj_chaos) if str(mj_chaos).strip() else 5,
settings_mj_seed=int(mj_seed) if str(mj_seed).strip() else 42,
settings_sdxl_steps=int(sdxl_steps) if str(sdxl_steps).strip() else 34,
settings_sdxl_cfg=int(sdxl_cfg) if str(sdxl_cfg).strip() else 5,
settings_sdxl_sampler=sdxl_sampler or "DPM++ SDE Karras",
settings_sdxl_resolution=sdxl_resolution or "1024x1280",
settings_sdxl_refiner=float(sdxl_refiner) if str(sdxl_refiner).strip() else 0.25,
settings_dalle_resolution=dalle_resolution or "1024x1024",
)
universal = build_universal(f)
mj = format_midjourney(universal, f)
sdxl = format_sdxl(universal, f)
dalle = format_dalle(universal, f)
return universal, mj, sdxl, dalle, (f.negatives or NEGATIVE_BASELINE)
# ---------- Reverse prompt helpers ----------
CAPTIONER = None
def init_captioner():
if not HAS_TRANSFORMERS:
return None
try:
return pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
except Exception:
return None
CAPTIONER = init_captioner()
OBJDET = None
def init_objdet():
if not HAS_TRANSFORMERS:
return None
try:
return pipeline("object-detection", model="facebook/detr-resnet-50")
except Exception:
return None
OBJDET = init_objdet()
def download_haarcascade() -> Optional[str]:
if cv2 is None:
return None
fname = "haarcascade_frontalface_default.xml"
if os.path.exists(fname):
return fname
import requests
url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml"
try:
r = requests.get(url, timeout=15)
r.raise_for_status()
with open(fname, "wb") as f:
f.write(r.content)
return fname
except Exception:
return None
def detect_faces(pil_img: Image.Image) -> int:
if cv2 is None:
return 0
path = download_haarcascade()
if not path:
return 0
try:
img = np.array(pil_img.convert("RGB"))
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
face_cascade = cv2.CascadeClassifier(path)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
return 0 if faces is None else len(faces)
except Exception:
return 0
def avg_brightness(pil_img: Image.Image) -> float:
stat = ImageStat.Stat(pil_img.convert("L"))
return float(stat.mean[0])
def nearest_aspect(w: int, h: int) -> str:
target = w / h
candidates = { "1:1": 1.0, "4:5": 0.8, "5:4": 1.25, "4:3": 1.333, "3:2": 1.5, "16:9": 1.777 }
return min(candidates.items(), key=lambda kv: abs(kv[1] - target))[0]
def _article(word: str) -> str:
return "an" if word and word[0].lower() in "aeiou" else "a"
def _label_to_phrase(label: str) -> str:
nice = {"tv": "television", "cell phone": "phone", "sports ball": "ball", "potted plant": "potted plant"}
word = nice.get(label, label)
return f"{_article(word)} {word}"
def _centrality_score(cx, cy, W, H):
dx = abs(cx - W/2) / (W/2)
dy = abs(cy - H/2) / (H/2)
dist = min(1.0, (dx*dx + dy*dy) ** 0.5)
return 1.0 - dist
def _detect_main_subject(img: Image.Image):
if OBJDET is None:
return None, []
try:
dets = OBJDET(img)
except Exception:
return None, []
if not dets:
return None, []
W, H = img.size
scored = []
for d in dets:
box = d.get("box", {})
xmin, ymin = box.get("xmin", 0), box.get("ymin", 0)
xmax, ymax = box.get("xmax", 0), box.get("ymax", 0)
w, h = max(1, xmax - xmin), max(1, ymax - ymin)
area = (w * h) / float(W * H)
cx, cy = xmin + w/2, ymin + h/2
central = _centrality_score(cx, cy, W, H)
conf = float(d.get("score", 0.0))
label = d.get("label", "")
score = conf * (0.6 * area + 0.4 * central)
scored.append({"label": label, "score": score})
scored.sort(key=lambda x: x["score"], reverse=True)
main_phrase = _label_to_phrase(scored[0]["label"])
suggestions, seen = [], set()
for s in scored:
p = _label_to_phrase(s["label"])
if p not in seen:
suggestions.append(p)
seen.add(p)
if len(suggestions) >= 5:
break
return main_phrase, suggestions
def _action_from_caption(caption: str) -> str:
c = (caption or "").lower()
for key in ["running", "sprinting", "walking", "standing", "jumping", "riding", "driving", "sitting"]:
if key in c:
return key
return ""
def extract_fields_from_image(img: Image.Image) -> Dict[str, Any]:
caption = ""
if CAPTIONER:
try:
out = CAPTIONER(img)
if isinstance(out, list) and out:
caption = out[0].get("generated_text", "")
except Exception:
caption = ""
brightness = avg_brightness(img)
if brightness > 140:
timeWeather = "daylight"
elif 100 < brightness <= 140:
timeWeather = "overcast daylight"
else:
timeWeather = "night with ambient light"
subject_phrase, subject_suggestions = _detect_main_subject(img)
faces = detect_faces(img)
if not subject_phrase and faces > 0:
subject_phrase = "a person"
if not subject_phrase:
m = re.search(r"(a|an|the)\s+([^,.]+?)(?:\s+(on|in|at|by|with|near|amid|from)\b|[.,]|$)", (caption or "").lower())
subject_phrase = m.group(0).rstrip(",.") if m else ("a person" if faces > 0 else "a real-world subject")
if subject_phrase.startswith(("a person", "an person")):
act = _action_from_caption(caption)
if act and act not in subject_phrase:
subject_phrase = f"{subject_phrase} {act}"
if subject_phrase.startswith(("a person", "an person")):
focal = 35
aperture = "f/2.8"
iso = 200 if "day" in timeWeather else 800
composition = "eye‑level, rear three‑quarter or profile, leading lines, shallow DOF"
lighting = "soft natural light" if "day" in timeWeather else "mixed ambient light with practicals, soft shadows"
micro = "skin pores, fabric textures, scuffs, dust in the air"
motion = "slight motion blur on limbs if running" if "running" in subject_phrase else "no visible motion blur"
color_grade = "neutral, true-to-life colors, gentle contrast, high micro‑contrast"
else:
focal = 35
aperture = "f/2.8"
iso = 200 if "day" in timeWeather else 800
composition = "eye‑level, balanced framing, leading lines, shallow DOF"
lighting = "soft natural light" if "day" in timeWeather else "mixed ambient light with practicals, soft shadows"
micro = "texture of materials, dust, subtle scratches, specular highlights"
motion = "slight motion blur if present, volumetric light if applicable"
color_grade = "neutral, true-to-life colors, gentle contrast, high micro‑contrast"
w, h = img.size
aspect = nearest_aspect(w, h)
return {
"subject": subject_phrase,
"subjectCandidates": subject_suggestions,
"environment": "",
"timeWeather": timeWeather,
"camera": {
"cameraBody": "",
"focalLengthMm": focal,
"aperture": aperture,
"iso": iso
},
"composition": composition,
"lighting": lighting,
"microDetails": micro,
"motionAtmosphere": motion,
"colorGrade": color_grade,
"realismCues": True,
"aspectRatio": aspect,
"negatives": NEGATIVE_BASELINE,
"model": "sdxl"
}
def reverse_prompt(image: Image.Image):
if image is None:
return {}, "", "", {"positive": "", "negative": "", "settings": {}}, {"prompt": "", "resolution": ""}, NEGATIVE_BASELINE, gr.update(choices=[], value=None)
fields = extract_fields_from_image(image)
f = PromptFields(
subject=fields["subject"],
environment=fields.get("environment", ""),
timeWeather=fields.get("timeWeather", ""),
camera=CameraSpec(
cameraBody=fields["camera"].get("cameraBody", ""),
focalLengthMm=fields["camera"].get("focalLengthMm", None),
aperture=fields["camera"].get("aperture", ""),
iso=fields["camera"].get("iso", None),
),
composition=fields.get("composition", ""),
lighting=fields.get("lighting", ""),
microDetails=fields.get("microDetails", ""),
motionAtmosphere=fields.get("motionAtmosphere", ""),
colorGrade=fields.get("colorGrade", ""),
realismCues=True,
aspectRatio=fields.get("aspectRatio", "4:5"),
negatives=fields.get("negatives", NEGATIVE_BASELINE),
)
universal = build_universal(f)
mj = format_midjourney(universal, f)
sdxl = format_sdxl(universal, f)
dalle = format_dalle(universal, f)
cands = fields.get("subjectCandidates", []) or []
dd = gr.update(choices=cands, value=(cands[0] if cands else None))
return fields, universal, mj, sdxl, dalle, (fields.get("negatives") or NEGATIVE_BASELINE), dd
# ---------- Presets ----------
PRESETS = {
"Portrait (4:5)": PromptFields(
subject="a 30‑year‑old person with freckles",
environment="sunlit loft by a large window",
timeWeather="golden hour",
camera=CameraSpec(cameraBody="", focalLengthMm=85, aperture="f/1.8", iso=200),
composition="eye‑level half‑body, rule of thirds, shallow DOF, circular bokeh",
lighting="soft window key at 45°, reflector fill, subtle hair rim, 5400K",
microDetails="skin pores, peach fuzz, flyaway hairs, natural imperfections",
motionAtmosphere="gentle breeze, no visible blur",
colorGrade="warm Portra‑like, soft contrast, high dynamic range",
realismCues=True,
aspectRatio="4:5",
negatives=NEGATIVE_BASELINE
),
"Product beverage (3:2)": PromptFields(
subject="a cold amber beer bottle",
environment="on a wet slate surface with ice, studio",
timeWeather="controlled studio",
camera=CameraSpec(cameraBody="", focalLengthMm=50, aperture="f/4.0", iso=100),
composition="low angle, hero shot, product centered",
lighting="large softbox key from 45°, strip rim from behind, black flags, 5000K",
microDetails="condensation droplets, micro‑scratches on glass, label fibers, subtle fingerprints",
motionAtmosphere="no motion, crisp detail",
colorGrade="clean neutrals, high micro‑contrast",
realismCues=True,
aspectRatio="3:2",
negatives=NEGATIVE_BASELINE
),
"Architecture (16:9)": PromptFields(
subject="a modern concrete house facade",
environment="suburban street",
timeWeather="overcast day",
camera=CameraSpec(cameraBody="", focalLengthMm=24, aperture="f/8", iso=100),
composition="straight‑on elevation, leading lines, no keystone distortion",
lighting="soft diffuse skylight, no harsh shadows, 6000K",
microDetails="concrete texture, subtle stains, window reflections",
motionAtmosphere="static scene, no motion blur, crisp detail",
colorGrade="neutral, low contrast, high DR",
realismCues=True,
aspectRatio="16:9",
negatives=NEGATIVE_BASELINE
),
"Night street (3:2)": PromptFields(
subject="a rainy night city street with neon signs",
environment="downtown alley",
timeWeather="night, light rain",
camera=CameraSpec(cameraBody="", focalLengthMm=35, aperture="f/1.8", iso=1600),
composition="eye‑level, leading lines, reflections on wet pavement",
lighting="neon signs as key, practicals for fill, 3200–4500K mix, glows and halos",
microDetails="raindrops, puddle ripples, specular reflections, wet textures",
motionAtmosphere="light motion blur on pedestrians, volumetric haze",
colorGrade="cinematic teal‑magenta, gentle contrast",
realismCues=True,
aspectRatio="3:2",
negatives=NEGATIVE_BASELINE
)
}
def load_preset(name: str):
f = PRESETS.get(name)
if not f:
return [gr.update()] * 23
return (
f.subject, f.environment, f.timeWeather,
f.camera.cameraBody, f.camera.focalLengthMm or "",
f.camera.aperture, f.camera.iso or "",
f.composition, f.lighting, f.microDetails, f.motionAtmosphere, f.colorGrade,
f.realismCues, f.aspectRatio, f.negatives,
f.settings_mj_s, f.settings_mj_chaos, f.settings_mj_seed,
f.settings_sdxl_steps, f.settings_sdxl_cfg, f.settings_sdxl_sampler, f.settings_sdxl_resolution, f.settings_sdxl_refiner,
f.settings_dalle_resolution
)
# ---------- UI ----------
with gr.Blocks(title=APP_TITLE) as demo:
gr.Markdown(f"# {APP_TITLE}\nCreate model‑ready, ultra‑realistic photo prompts. Reverse‑prompt from an image if you like. Free, no API keys.\n\nNote: Reverse analysis avoids identifying real people; it only describes general appearance/lighting/style.")
with gr.Tab("Build"):
with gr.Row():
with gr.Column(scale=1):
preset = gr.Dropdown(choices=list(PRESETS.keys()), label="Presets")
load_btn = gr.Button("Load preset")
subject = gr.Textbox(label="Subject", placeholder="e.g., a person running")
environment = gr.Textbox(label="Environment/Setting", placeholder="e.g., sunlit loft by a large window")
timeWeather = gr.Textbox(label="Time & Weather", placeholder="e.g., golden hour")
with gr.Accordion("Camera", open=False):
cameraBody = gr.Textbox(label="Camera body (optional)", placeholder="e.g., Canon R5")
focalLengthMm = gr.Textbox(label="Focal length (mm)", placeholder="e.g., 85")
aperture = gr.Textbox(label="Aperture", placeholder="e.g., f/1.8")
iso = gr.Textbox(label="ISO", placeholder="e.g., 200")
composition = gr.Textbox(label="Composition & Perspective", placeholder="e.g., eye‑level, shallow DOF, rule of thirds")
lighting = gr.Textbox(label="Lighting", placeholder="e.g., soft window key at 45°, reflector fill, rim, 5400K")
microDetails = gr.Textbox(label="Materials & Micro‑detail", placeholder="e.g., skin pores, fabric weave, subtle scratches")
motionAtmosphere = gr.Textbox(label="Motion/Atmosphere", placeholder="e.g., slight motion blur, volumetric light, haze")
colorGrade = gr.Textbox(label="Color & Grade", placeholder="e.g., warm Portra‑like, soft contrast, high DR")
realismCues = gr.Checkbox(value=True, label="Include realism cues")
aspectRatio = gr.Textbox(label="Aspect ratio", value="4:5", placeholder="e.g., 4:5, 3:2, 16:9")
negatives = gr.Textbox(label="Negative prompt", value=NEGATIVE_BASELINE)
with gr.Accordion("Model settings", open=False):
mj_s = gr.Slider(1, 1000, value=100, step=1, label="Midjourney --s")
mj_chaos = gr.Slider(0, 100, value=5, step=1, label="Midjourney --chaos")
mj_seed = gr.Slider(0, 999999, value=42, step=1, label="Midjourney --seed")
sdxl_steps = gr.Slider(10, 100, value=34, step=1, label="SDXL steps")
sdxl_cfg = gr.Slider(1, 20, value=5, step=1, label="SDXL CFG")
sdxl_sampler = gr.Textbox(label="SDXL sampler", value="DPM++ SDE Karras")
sdxl_resolution = gr.Textbox(label="SDXL resolution", value="1024x1280")
sdxl_refiner = gr.Slider(0.0, 1.0, value=0.25, step=0.05, label="SDXL refiner denoise")
dalle_resolution = gr.Textbox(label="DALL·E resolution", value="1024x1024")
gen_btn = gr.Button("Generate prompts")
with gr.Column(scale=1):
universal_out = gr.Textbox(label="Universal prompt", lines=6)
mj_out = gr.Textbox(label="Midjourney prompt", lines=6)
sdxl_out = gr.JSON(label="SDXL prompt (positive, negative, settings)")
dalle_out = gr.JSON(label="DALL·E 3 prompt")
neg_out = gr.Textbox(label="Negative prompt (for SDXL)", value=NEGATIVE_BASELINE)
load_btn.click(load_preset, inputs=[preset], outputs=[
subject, environment, timeWeather,
cameraBody, focalLengthMm, aperture, iso,
composition, lighting, microDetails, motionAtmosphere, colorGrade,
realismCues, aspectRatio, negatives,
mj_s, mj_chaos, mj_seed,
sdxl_steps, sdxl_cfg, sdxl_sampler, sdxl_resolution, sdxl_refiner,
dalle_resolution
])
gen_btn.click(
compose,
inputs=[
subject, environment, timeWeather,
cameraBody, focalLengthMm, aperture, iso,
composition, lighting, microDetails, motionAtmosphere, colorGrade,
realismCues, aspectRatio, negatives,
mj_s, mj_chaos, mj_seed,
sdxl_steps, sdxl_cfg, sdxl_sampler, sdxl_resolution, sdxl_refiner,
dalle_resolution
],
outputs=[universal_out, mj_out, sdxl_out, dalle_out, neg_out]
)
with gr.Tab("Reverse (Image → Prompt)"):
gr.Markdown("Upload an image. The app will infer fields without identifying real people, then build prompts. Use the detected-subject dropdown to set the main subject.")
image_in = gr.Image(type="pil", label="Upload image")
analyze_btn = gr.Button("Analyze & Generate")
subject_pick = gr.Dropdown(label="Detected subjects (pick one)", choices=[], value=None)
fields_out = gr.JSON(label="Extracted fields (editable in Build tab if needed)")
universal_out_r = gr.Textbox(label="Universal prompt", lines=6)
mj_out_r = gr.Textbox(label="Midjourney prompt", lines=6)
sdxl_out_r = gr.JSON(label="SDXL prompt (positive, negative, settings)")
dalle_out_r = gr.JSON(label="DALL·E 3 prompt")
neg_out_r = gr.Textbox(label="Negative prompt (for SDXL)", value=NEGATIVE_BASELINE)
analyze_btn.click(
reverse_prompt,
inputs=[image_in],
outputs=[fields_out, universal_out_r, mj_out_r, sdxl_out_r, dalle_out_r, neg_out_r, subject_pick]
)
def use_picked_subject(picked):
return picked or ""
subject_pick.change(use_picked_subject, inputs=[subject_pick], outputs=[subject])
gr.Markdown(
"Tips\n"
"- For Midjourney, prepend 1–2 reference image URLs; keep --style raw.\n"
"- For SDXL, use Refiner at 0.2–0.4 and upscale 1.5–2.0x for micro‑detail.\n"
"- DALL·E 3 responds best to concise photographic prose with lens + lighting."
)
if __name__ == "__main__":
demo.launch()