trilochan commited on
Commit
5722a4c
·
verified ·
1 Parent(s): fced434

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -97
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import os
2
  import re
3
- import math
4
- from dataclasses import dataclass, asdict
5
- from typing import Dict, Any, Optional, Tuple
6
 
7
  import gradio as gr
8
  from PIL import Image, ImageStat
@@ -27,19 +26,6 @@ NEGATIVE_BASELINE = (
27
  "deformed hands, blurry, depth map artifacts, harsh HDR, unrealistic colors"
28
  )
29
 
30
- DEFAULT_SDXL_SETTINGS = {
31
- "steps": 34,
32
- "cfg": 5,
33
- "sampler": "DPM++ SDE Karras",
34
- "resolution": "1024 on long side",
35
- "refiner": 0.25,
36
- "hires": "1.5–2.0x upscale for micro‑detail"
37
- }
38
-
39
- # Lightweight captioner (free CPU). If unavailable, reverse will degrade gracefully.
40
- CAPTION_MODEL = "Salesforce/blip-image-captioning-base" # CPU-friendly
41
-
42
-
43
  @dataclass
44
  class CameraSpec:
45
  cameraBody: str = ""
@@ -47,7 +33,6 @@ class CameraSpec:
47
  aperture: str = ""
48
  iso: Optional[int] = None
49
 
50
-
51
  @dataclass
52
  class PromptFields:
53
  subject: str = ""
@@ -63,17 +48,19 @@ class PromptFields:
63
  aspectRatio: str = "4:5"
64
  negatives: str = NEGATIVE_BASELINE
65
  model: str = "sdxl" # "mj" | "sdxl" | "dalle"
 
66
  settings_mj_s: int = 100
67
  settings_mj_chaos: int = 5
68
  settings_mj_seed: int = 42
 
69
  settings_sdxl_steps: int = 34
70
  settings_sdxl_cfg: int = 5
71
  settings_sdxl_sampler: str = "DPM++ SDE Karras"
72
  settings_sdxl_resolution: str = "1024x1280"
73
  settings_sdxl_refiner: float = 0.25
 
74
  settings_dalle_resolution: str = "1024x1024"
75
 
76
-
77
  def realism_string(enabled: bool) -> str:
78
  if not enabled:
79
  return ""
@@ -83,26 +70,17 @@ def realism_string(enabled: bool) -> str:
83
  "subtle chromatic aberration, vignette."
84
  )
85
 
86
-
87
- def safe_join(parts):
88
  return " ".join([p.strip() for p in parts if p and str(p).strip()]).replace(" ", " ").strip()
89
 
90
-
91
  def build_universal(f: PromptFields) -> str:
92
- # First sentence
93
- s1 = ""
94
- if f.subject:
95
- s1 = f"Photo of {f.subject}"
96
- else:
97
- s1 = "Photo"
98
-
99
  if f.environment:
100
  s1 += f" in/at {f.environment}"
101
  if f.timeWeather:
102
  s1 += f", {f.timeWeather}"
103
  s1 += "."
104
 
105
- # Camera
106
  cam_bits = []
107
  if f.camera and f.camera.focalLengthMm:
108
  cam_bits.append(f"{f.camera.focalLengthMm}mm lens")
@@ -110,10 +88,7 @@ def build_universal(f: PromptFields) -> str:
110
  cam_bits.append(f"at {f.camera.aperture}")
111
  if f.camera and f.camera.iso:
112
  cam_bits.append(f"ISO {f.camera.iso}")
113
- if cam_bits:
114
- s2 = "Shot with a " + ", ".join(cam_bits) + "."
115
- else:
116
- s2 = ""
117
 
118
  s3 = f"{f.composition}." if f.composition else ""
119
  s4 = f"Lighting: {f.lighting}." if f.lighting else ""
@@ -122,14 +97,11 @@ def build_universal(f: PromptFields) -> str:
122
  s7 = f"Color & grade: {f.colorGrade}." if f.colorGrade else ""
123
  s8 = realism_string(f.realismCues)
124
 
125
- universal = safe_join([s1, s2, s3, s4, s5, s6, s7, s8])
126
- return universal
127
-
128
 
129
  def format_midjourney(universal: str, f: PromptFields) -> str:
130
  return f"{universal} --style raw --ar {f.aspectRatio} --s {f.settings_mj_s} --chaos {f.settings_mj_chaos} --seed {f.settings_mj_seed}"
131
 
132
-
133
  def format_sdxl(universal: str, f: PromptFields) -> Dict[str, Any]:
134
  return {
135
  "positive": universal,
@@ -144,12 +116,10 @@ def format_sdxl(universal: str, f: PromptFields) -> Dict[str, Any]:
144
  }
145
  }
146
 
147
-
148
  def format_dalle(universal: str, f: PromptFields) -> Dict[str, Any]:
149
  prose = f"A high‑resolution photograph. {universal}"
150
  return {"prompt": prose, "resolution": f.settings_dalle_resolution}
151
 
152
-
153
  def compose(
154
  subject, environment, timeWeather,
155
  cameraBody, focalLengthMm, aperture, iso,
@@ -187,26 +157,33 @@ def compose(
187
  settings_sdxl_refiner=float(sdxl_refiner) if str(sdxl_refiner).strip() else 0.25,
188
  settings_dalle_resolution=dalle_resolution or "1024x1024",
189
  )
190
-
191
  universal = build_universal(f)
192
  mj = format_midjourney(universal, f)
193
  sdxl = format_sdxl(universal, f)
194
  dalle = format_dalle(universal, f)
195
  return universal, mj, sdxl, dalle, (f.negatives or NEGATIVE_BASELINE)
196
 
197
-
198
  # ---------- Reverse prompt helpers ----------
199
 
 
200
  def init_captioner():
201
  if not HAS_TRANSFORMERS:
202
  return None
203
  try:
204
- return pipeline("image-to-text", model=CAPTION_MODEL)
205
  except Exception:
206
  return None
207
-
208
  CAPTIONER = init_captioner()
209
 
 
 
 
 
 
 
 
 
 
210
 
211
  def download_haarcascade() -> Optional[str]:
212
  if cv2 is None:
@@ -225,7 +202,6 @@ def download_haarcascade() -> Optional[str]:
225
  except Exception:
226
  return None
227
 
228
-
229
  def detect_faces(pil_img: Image.Image) -> int:
230
  if cv2 is None:
231
  return 0
@@ -241,28 +217,75 @@ def detect_faces(pil_img: Image.Image) -> int:
241
  except Exception:
242
  return 0
243
 
244
-
245
  def avg_brightness(pil_img: Image.Image) -> float:
246
  stat = ImageStat.Stat(pil_img.convert("L"))
247
  return float(stat.mean[0])
248
 
249
-
250
  def nearest_aspect(w: int, h: int) -> str:
251
  target = w / h
252
- candidates = {
253
- "1:1": 1.0,
254
- "4:5": 0.8,
255
- "5:4": 1.25,
256
- "4:3": 1.333,
257
- "3:2": 1.5,
258
- "16:9": 1.777
259
- }
260
- best = min(candidates.items(), key=lambda kv: abs(kv[1] - target))[0]
261
- return best
262
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
  def extract_fields_from_image(img: Image.Image) -> Dict[str, Any]:
265
- # Caption
266
  caption = ""
267
  if CAPTIONER:
268
  try:
@@ -271,38 +294,55 @@ def extract_fields_from_image(img: Image.Image) -> Dict[str, Any]:
271
  caption = out[0].get("generated_text", "")
272
  except Exception:
273
  caption = ""
274
- # Brightness -> time
275
  brightness = avg_brightness(img)
276
- timeWeather = "golden hour" if 130 <= brightness <= 170 else ("daylight" if brightness > 140 else "night with ambient light")
 
 
 
 
 
 
 
277
 
278
- # Faces -> portrait heuristics
279
  faces = detect_faces(img)
280
- if faces > 0:
281
- subject = "a person (no identity) " + (f"— {caption}" if caption else "")
282
- composition = "eye‑level portrait, head-and-shoulders, shallow DOF, rule of thirds"
283
- focal = 85
284
- aperture = "f/1.8"
 
 
 
 
 
 
 
 
 
 
285
  iso = 200 if "day" in timeWeather else 800
286
- color_grade = "warm Portralike, soft contrast, high dynamic range"
287
- lighting = "soft key at 45°, gentle reflector fill, subtle rim; ~5400K" if "day" in timeWeather else "soft practicals, dim ambient, ~3200–4000K"
288
- micro = "skin pores, peach fuzz, flyaway hairs, natural imperfections"
289
- motion = "no visible motion blur, creamy circular bokeh"
 
290
  else:
291
- subject = caption or "a real-world scene"
292
- composition = "eye‑level, balanced framing, leading lines, shallow DOF"
293
  focal = 35
294
  aperture = "f/2.8"
295
  iso = 200 if "day" in timeWeather else 800
296
- color_grade = "neutral, true-to-life colors, gentle contrast, high micro-contrast"
297
- lighting = "soft natural light, mild shadows" if "day" in timeWeather else "mixed ambient light with practicals, soft shadows"
298
  micro = "texture of materials, dust, subtle scratches, specular highlights"
299
  motion = "slight motion blur if present, volumetric light if applicable"
 
300
 
301
  w, h = img.size
302
  aspect = nearest_aspect(w, h)
303
 
304
- fields = {
305
- "subject": subject,
 
306
  "environment": "",
307
  "timeWeather": timeWeather,
308
  "camera": {
@@ -321,15 +361,12 @@ def extract_fields_from_image(img: Image.Image) -> Dict[str, Any]:
321
  "negatives": NEGATIVE_BASELINE,
322
  "model": "sdxl"
323
  }
324
- return fields
325
-
326
 
327
  def reverse_prompt(image: Image.Image):
328
  if image is None:
329
- return gr.update(value=""), "", {"positive": "", "negative": "", "settings": {}}, {"prompt": "", "resolution": ""}, NEGATIVE_BASELINE
330
 
331
  fields = extract_fields_from_image(image)
332
- # Build objects
333
  f = PromptFields(
334
  subject=fields["subject"],
335
  environment=fields.get("environment", ""),
@@ -353,8 +390,10 @@ def reverse_prompt(image: Image.Image):
353
  mj = format_midjourney(universal, f)
354
  sdxl = format_sdxl(universal, f)
355
  dalle = format_dalle(universal, f)
356
- return fields, universal, mj, sdxl, dalle, (fields.get("negatives") or NEGATIVE_BASELINE)
357
 
 
 
 
358
 
359
  # ---------- Presets ----------
360
 
@@ -417,7 +456,6 @@ PRESETS = {
417
  )
418
  }
419
 
420
-
421
  def load_preset(name: str):
422
  f = PRESETS.get(name)
423
  if not f:
@@ -433,7 +471,6 @@ def load_preset(name: str):
433
  f.settings_dalle_resolution
434
  )
435
 
436
-
437
  # ---------- UI ----------
438
 
439
  with gr.Blocks(title=APP_TITLE) as demo:
@@ -445,7 +482,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
445
  preset = gr.Dropdown(choices=list(PRESETS.keys()), label="Presets")
446
  load_btn = gr.Button("Load preset")
447
 
448
- subject = gr.Textbox(label="Subject", placeholder="e.g., a 30‑year‑old person with freckles")
449
  environment = gr.Textbox(label="Environment/Setting", placeholder="e.g., sunlit loft by a large window")
450
  timeWeather = gr.Textbox(label="Time & Weather", placeholder="e.g., golden hour")
451
 
@@ -455,8 +492,8 @@ with gr.Blocks(title=APP_TITLE) as demo:
455
  aperture = gr.Textbox(label="Aperture", placeholder="e.g., f/1.8")
456
  iso = gr.Textbox(label="ISO", placeholder="e.g., 200")
457
 
458
- composition = gr.Textbox(label="Composition & Perspective", placeholder="e.g., eye‑level half‑body, rule of thirds, shallow DOF")
459
- lighting = gr.Textbox(label="Lighting", placeholder="e.g., soft window key at 45°, reflector fill, subtle rim, 5400K")
460
  microDetails = gr.Textbox(label="Materials & Micro‑detail", placeholder="e.g., skin pores, fabric weave, subtle scratches")
461
  motionAtmosphere = gr.Textbox(label="Motion/Atmosphere", placeholder="e.g., slight motion blur, volumetric light, haze")
462
  colorGrade = gr.Textbox(label="Color & Grade", placeholder="e.g., warm Portra‑like, soft contrast, high DR")
@@ -512,9 +549,10 @@ with gr.Blocks(title=APP_TITLE) as demo:
512
  )
513
 
514
  with gr.Tab("Reverse (Image → Prompt)"):
515
- gr.Markdown("Upload an image. The app will infer fields without identifying real people, then build prompts.")
516
  image_in = gr.Image(type="pil", label="Upload image")
517
  analyze_btn = gr.Button("Analyze & Generate")
 
518
  fields_out = gr.JSON(label="Extracted fields (editable in Build tab if needed)")
519
  universal_out_r = gr.Textbox(label="Universal prompt", lines=6)
520
  mj_out_r = gr.Textbox(label="Midjourney prompt", lines=6)
@@ -525,21 +563,20 @@ with gr.Blocks(title=APP_TITLE) as demo:
525
  analyze_btn.click(
526
  reverse_prompt,
527
  inputs=[image_in],
528
- outputs=[fields_out, universal_out_r, mj_out_r, sdxl_out_r, dalle_out_r, neg_out_r]
529
  )
530
 
 
 
 
 
 
531
  gr.Markdown(
532
  "Tips\n"
533
- "- For Midjourney, prepend 1–2 reference image URLs to match look/lighting; keep --style raw.\n"
534
  "- For SDXL, use Refiner at 0.2–0.4 and upscale 1.5–2.0x for micro‑detail.\n"
535
- "- DALL·E 3 responds best to concise, photographic prose with lens + lighting."
536
  )
537
 
538
  if __name__ == "__main__":
539
- demo.launch()
540
- gradio>=4.40.0
541
- pillow
542
- numpy
543
- transformers>=4.42.0
544
- torch>=2.3.0
545
- opencv-python-headless
 
1
  import os
2
  import re
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Any, Optional, Tuple, List
 
5
 
6
  import gradio as gr
7
  from PIL import Image, ImageStat
 
26
  "deformed hands, blurry, depth map artifacts, harsh HDR, unrealistic colors"
27
  )
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  @dataclass
30
  class CameraSpec:
31
  cameraBody: str = ""
 
33
  aperture: str = ""
34
  iso: Optional[int] = None
35
 
 
36
  @dataclass
37
  class PromptFields:
38
  subject: str = ""
 
48
  aspectRatio: str = "4:5"
49
  negatives: str = NEGATIVE_BASELINE
50
  model: str = "sdxl" # "mj" | "sdxl" | "dalle"
51
+ # MJ
52
  settings_mj_s: int = 100
53
  settings_mj_chaos: int = 5
54
  settings_mj_seed: int = 42
55
+ # SDXL
56
  settings_sdxl_steps: int = 34
57
  settings_sdxl_cfg: int = 5
58
  settings_sdxl_sampler: str = "DPM++ SDE Karras"
59
  settings_sdxl_resolution: str = "1024x1280"
60
  settings_sdxl_refiner: float = 0.25
61
+ # DALL·E
62
  settings_dalle_resolution: str = "1024x1024"
63
 
 
64
  def realism_string(enabled: bool) -> str:
65
  if not enabled:
66
  return ""
 
70
  "subtle chromatic aberration, vignette."
71
  )
72
 
73
+ def safe_join(parts: List[str]) -> str:
 
74
  return " ".join([p.strip() for p in parts if p and str(p).strip()]).replace(" ", " ").strip()
75
 
 
76
  def build_universal(f: PromptFields) -> str:
77
+ s1 = f"Photo of {f.subject}" if f.subject else "Photo"
 
 
 
 
 
 
78
  if f.environment:
79
  s1 += f" in/at {f.environment}"
80
  if f.timeWeather:
81
  s1 += f", {f.timeWeather}"
82
  s1 += "."
83
 
 
84
  cam_bits = []
85
  if f.camera and f.camera.focalLengthMm:
86
  cam_bits.append(f"{f.camera.focalLengthMm}mm lens")
 
88
  cam_bits.append(f"at {f.camera.aperture}")
89
  if f.camera and f.camera.iso:
90
  cam_bits.append(f"ISO {f.camera.iso}")
91
+ s2 = "Shot with a " + ", ".join(cam_bits) + "." if cam_bits else ""
 
 
 
92
 
93
  s3 = f"{f.composition}." if f.composition else ""
94
  s4 = f"Lighting: {f.lighting}." if f.lighting else ""
 
97
  s7 = f"Color & grade: {f.colorGrade}." if f.colorGrade else ""
98
  s8 = realism_string(f.realismCues)
99
 
100
+ return safe_join([s1, s2, s3, s4, s5, s6, s7, s8])
 
 
101
 
102
  def format_midjourney(universal: str, f: PromptFields) -> str:
103
  return f"{universal} --style raw --ar {f.aspectRatio} --s {f.settings_mj_s} --chaos {f.settings_mj_chaos} --seed {f.settings_mj_seed}"
104
 
 
105
  def format_sdxl(universal: str, f: PromptFields) -> Dict[str, Any]:
106
  return {
107
  "positive": universal,
 
116
  }
117
  }
118
 
 
119
  def format_dalle(universal: str, f: PromptFields) -> Dict[str, Any]:
120
  prose = f"A high‑resolution photograph. {universal}"
121
  return {"prompt": prose, "resolution": f.settings_dalle_resolution}
122
 
 
123
  def compose(
124
  subject, environment, timeWeather,
125
  cameraBody, focalLengthMm, aperture, iso,
 
157
  settings_sdxl_refiner=float(sdxl_refiner) if str(sdxl_refiner).strip() else 0.25,
158
  settings_dalle_resolution=dalle_resolution or "1024x1024",
159
  )
 
160
  universal = build_universal(f)
161
  mj = format_midjourney(universal, f)
162
  sdxl = format_sdxl(universal, f)
163
  dalle = format_dalle(universal, f)
164
  return universal, mj, sdxl, dalle, (f.negatives or NEGATIVE_BASELINE)
165
 
 
166
  # ---------- Reverse prompt helpers ----------
167
 
168
+ CAPTIONER = None
169
  def init_captioner():
170
  if not HAS_TRANSFORMERS:
171
  return None
172
  try:
173
+ return pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
174
  except Exception:
175
  return None
 
176
  CAPTIONER = init_captioner()
177
 
178
+ OBJDET = None
179
+ def init_objdet():
180
+ if not HAS_TRANSFORMERS:
181
+ return None
182
+ try:
183
+ return pipeline("object-detection", model="facebook/detr-resnet-50")
184
+ except Exception:
185
+ return None
186
+ OBJDET = init_objdet()
187
 
188
  def download_haarcascade() -> Optional[str]:
189
  if cv2 is None:
 
202
  except Exception:
203
  return None
204
 
 
205
  def detect_faces(pil_img: Image.Image) -> int:
206
  if cv2 is None:
207
  return 0
 
217
  except Exception:
218
  return 0
219
 
 
220
  def avg_brightness(pil_img: Image.Image) -> float:
221
  stat = ImageStat.Stat(pil_img.convert("L"))
222
  return float(stat.mean[0])
223
 
 
224
  def nearest_aspect(w: int, h: int) -> str:
225
  target = w / h
226
+ candidates = { "1:1": 1.0, "4:5": 0.8, "5:4": 1.25, "4:3": 1.333, "3:2": 1.5, "16:9": 1.777 }
227
+ return min(candidates.items(), key=lambda kv: abs(kv[1] - target))[0]
228
+
229
+ def _article(word: str) -> str:
230
+ return "an" if word and word[0].lower() in "aeiou" else "a"
231
+
232
+ def _label_to_phrase(label: str) -> str:
233
+ nice = {"tv": "television", "cell phone": "phone", "sports ball": "ball", "potted plant": "potted plant"}
234
+ word = nice.get(label, label)
235
+ return f"{_article(word)} {word}"
236
+
237
+ def _centrality_score(cx, cy, W, H):
238
+ dx = abs(cx - W/2) / (W/2)
239
+ dy = abs(cy - H/2) / (H/2)
240
+ dist = min(1.0, (dx*dx + dy*dy) ** 0.5)
241
+ return 1.0 - dist
242
+
243
+ def _detect_main_subject(img: Image.Image):
244
+ if OBJDET is None:
245
+ return None, []
246
+ try:
247
+ dets = OBJDET(img)
248
+ except Exception:
249
+ return None, []
250
+ if not dets:
251
+ return None, []
252
+
253
+ W, H = img.size
254
+ scored = []
255
+ for d in dets:
256
+ box = d.get("box", {})
257
+ xmin, ymin = box.get("xmin", 0), box.get("ymin", 0)
258
+ xmax, ymax = box.get("xmax", 0), box.get("ymax", 0)
259
+ w, h = max(1, xmax - xmin), max(1, ymax - ymin)
260
+ area = (w * h) / float(W * H)
261
+ cx, cy = xmin + w/2, ymin + h/2
262
+ central = _centrality_score(cx, cy, W, H)
263
+ conf = float(d.get("score", 0.0))
264
+ label = d.get("label", "")
265
+ score = conf * (0.6 * area + 0.4 * central)
266
+ scored.append({"label": label, "score": score})
267
+
268
+ scored.sort(key=lambda x: x["score"], reverse=True)
269
+ main_phrase = _label_to_phrase(scored[0]["label"])
270
+
271
+ suggestions, seen = [], set()
272
+ for s in scored:
273
+ p = _label_to_phrase(s["label"])
274
+ if p not in seen:
275
+ suggestions.append(p)
276
+ seen.add(p)
277
+ if len(suggestions) >= 5:
278
+ break
279
+ return main_phrase, suggestions
280
+
281
+ def _action_from_caption(caption: str) -> str:
282
+ c = (caption or "").lower()
283
+ for key in ["running", "sprinting", "walking", "standing", "jumping", "riding", "driving", "sitting"]:
284
+ if key in c:
285
+ return key
286
+ return ""
287
 
288
  def extract_fields_from_image(img: Image.Image) -> Dict[str, Any]:
 
289
  caption = ""
290
  if CAPTIONER:
291
  try:
 
294
  caption = out[0].get("generated_text", "")
295
  except Exception:
296
  caption = ""
297
+
298
  brightness = avg_brightness(img)
299
+ if brightness > 140:
300
+ timeWeather = "daylight"
301
+ elif 100 < brightness <= 140:
302
+ timeWeather = "overcast daylight"
303
+ else:
304
+ timeWeather = "night with ambient light"
305
+
306
+ subject_phrase, subject_suggestions = _detect_main_subject(img)
307
 
 
308
  faces = detect_faces(img)
309
+ if not subject_phrase and faces > 0:
310
+ subject_phrase = "a person"
311
+
312
+ if not subject_phrase:
313
+ m = re.search(r"(a|an|the)\s+([^,.]+?)(?:\s+(on|in|at|by|with|near|amid|from)\b|[.,]|$)", (caption or "").lower())
314
+ subject_phrase = m.group(0).rstrip(",.") if m else ("a person" if faces > 0 else "a real-world subject")
315
+
316
+ if subject_phrase.startswith(("a person", "an person")):
317
+ act = _action_from_caption(caption)
318
+ if act and act not in subject_phrase:
319
+ subject_phrase = f"{subject_phrase} {act}"
320
+
321
+ if subject_phrase.startswith(("a person", "an person")):
322
+ focal = 35
323
+ aperture = "f/2.8"
324
  iso = 200 if "day" in timeWeather else 800
325
+ composition = "eye‑level, rear threequarter or profile, leading lines, shallow DOF"
326
+ lighting = "soft natural light" if "day" in timeWeather else "mixed ambient light with practicals, soft shadows"
327
+ micro = "skin pores, fabric textures, scuffs, dust in the air"
328
+ motion = "slight motion blur on limbs if running" if "running" in subject_phrase else "no visible motion blur"
329
+ color_grade = "neutral, true-to-life colors, gentle contrast, high micro‑contrast"
330
  else:
 
 
331
  focal = 35
332
  aperture = "f/2.8"
333
  iso = 200 if "day" in timeWeather else 800
334
+ composition = "eye‑level, balanced framing, leading lines, shallow DOF"
335
+ lighting = "soft natural light" if "day" in timeWeather else "mixed ambient light with practicals, soft shadows"
336
  micro = "texture of materials, dust, subtle scratches, specular highlights"
337
  motion = "slight motion blur if present, volumetric light if applicable"
338
+ color_grade = "neutral, true-to-life colors, gentle contrast, high micro‑contrast"
339
 
340
  w, h = img.size
341
  aspect = nearest_aspect(w, h)
342
 
343
+ return {
344
+ "subject": subject_phrase,
345
+ "subjectCandidates": subject_suggestions,
346
  "environment": "",
347
  "timeWeather": timeWeather,
348
  "camera": {
 
361
  "negatives": NEGATIVE_BASELINE,
362
  "model": "sdxl"
363
  }
 
 
364
 
365
  def reverse_prompt(image: Image.Image):
366
  if image is None:
367
+ return {}, "", "", {"positive": "", "negative": "", "settings": {}}, {"prompt": "", "resolution": ""}, NEGATIVE_BASELINE, gr.update(choices=[], value=None)
368
 
369
  fields = extract_fields_from_image(image)
 
370
  f = PromptFields(
371
  subject=fields["subject"],
372
  environment=fields.get("environment", ""),
 
390
  mj = format_midjourney(universal, f)
391
  sdxl = format_sdxl(universal, f)
392
  dalle = format_dalle(universal, f)
 
393
 
394
+ cands = fields.get("subjectCandidates", []) or []
395
+ dd = gr.update(choices=cands, value=(cands[0] if cands else None))
396
+ return fields, universal, mj, sdxl, dalle, (fields.get("negatives") or NEGATIVE_BASELINE), dd
397
 
398
  # ---------- Presets ----------
399
 
 
456
  )
457
  }
458
 
 
459
  def load_preset(name: str):
460
  f = PRESETS.get(name)
461
  if not f:
 
471
  f.settings_dalle_resolution
472
  )
473
 
 
474
  # ---------- UI ----------
475
 
476
  with gr.Blocks(title=APP_TITLE) as demo:
 
482
  preset = gr.Dropdown(choices=list(PRESETS.keys()), label="Presets")
483
  load_btn = gr.Button("Load preset")
484
 
485
+ subject = gr.Textbox(label="Subject", placeholder="e.g., a person running")
486
  environment = gr.Textbox(label="Environment/Setting", placeholder="e.g., sunlit loft by a large window")
487
  timeWeather = gr.Textbox(label="Time & Weather", placeholder="e.g., golden hour")
488
 
 
492
  aperture = gr.Textbox(label="Aperture", placeholder="e.g., f/1.8")
493
  iso = gr.Textbox(label="ISO", placeholder="e.g., 200")
494
 
495
+ composition = gr.Textbox(label="Composition & Perspective", placeholder="e.g., eye‑level, shallow DOF, rule of thirds")
496
+ lighting = gr.Textbox(label="Lighting", placeholder="e.g., soft window key at 45°, reflector fill, rim, 5400K")
497
  microDetails = gr.Textbox(label="Materials & Micro‑detail", placeholder="e.g., skin pores, fabric weave, subtle scratches")
498
  motionAtmosphere = gr.Textbox(label="Motion/Atmosphere", placeholder="e.g., slight motion blur, volumetric light, haze")
499
  colorGrade = gr.Textbox(label="Color & Grade", placeholder="e.g., warm Portra‑like, soft contrast, high DR")
 
549
  )
550
 
551
  with gr.Tab("Reverse (Image → Prompt)"):
552
+ gr.Markdown("Upload an image. The app will infer fields without identifying real people, then build prompts. Use the detected-subject dropdown to set the main subject.")
553
  image_in = gr.Image(type="pil", label="Upload image")
554
  analyze_btn = gr.Button("Analyze & Generate")
555
+ subject_pick = gr.Dropdown(label="Detected subjects (pick one)", choices=[], value=None)
556
  fields_out = gr.JSON(label="Extracted fields (editable in Build tab if needed)")
557
  universal_out_r = gr.Textbox(label="Universal prompt", lines=6)
558
  mj_out_r = gr.Textbox(label="Midjourney prompt", lines=6)
 
563
  analyze_btn.click(
564
  reverse_prompt,
565
  inputs=[image_in],
566
+ outputs=[fields_out, universal_out_r, mj_out_r, sdxl_out_r, dalle_out_r, neg_out_r, subject_pick]
567
  )
568
 
569
+ def use_picked_subject(picked):
570
+ return picked or ""
571
+
572
+ subject_pick.change(use_picked_subject, inputs=[subject_pick], outputs=[subject])
573
+
574
  gr.Markdown(
575
  "Tips\n"
576
+ "- For Midjourney, prepend 1–2 reference image URLs; keep --style raw.\n"
577
  "- For SDXL, use Refiner at 0.2–0.4 and upscale 1.5–2.0x for micro‑detail.\n"
578
+ "- DALL·E 3 responds best to concise photographic prose with lens + lighting."
579
  )
580
 
581
  if __name__ == "__main__":
582
+ demo.launch()