| from __future__ import annotations |
|
|
| from typing import Any, Dict, List |
|
|
|
|
| def _norm_list(x: Any) -> List[str]: |
| if not x: |
| return [] |
| if isinstance(x, list): |
| return [str(v).strip() for v in x if str(v).strip()] |
| return [str(x).strip()] |
|
|
|
|
| def _join(items: List[str], sep: str = ", ") -> str: |
| items = [i.strip() for i in items if i and i.strip()] |
| return sep.join(items) |
|
|
|
|
| def _sent(items: List[str]) -> str: |
| """Sentence-ish join. Keeps it readable.""" |
| items = [i.strip() for i in items if i and i.strip()] |
| if not items: |
| return "" |
| if len(items) == 1: |
| return items[0] |
| return "; ".join(items) |
|
|
|
|
| def plan_to_prompts(plan: Any) -> Dict[str, str]: |
| """ |
| Convert the UnifiedPlanner JSON schema output into STRICT, modality-specific prompts. |
| This is the key fix: generators must obey the same semantic contract. |
| |
| Returns: |
| { |
| "text_prompt": "...", |
| "image_prompt": "...", |
| "audio_prompt": "...", |
| "shared_brief": "..." |
| } |
| """ |
|
|
| |
| if hasattr(plan, "model_dump"): |
| p = plan.model_dump() |
| elif isinstance(plan, dict): |
| p = plan |
| else: |
| |
| p = dict(plan) |
|
|
| scene_summary = str(p.get("scene_summary", "")).strip() |
| domain = str(p.get("domain", "")).strip() |
|
|
| |
| core_sem = p.get("core_semantics", {}) |
| style_ctrl = p.get("style_controls", {}) |
| img_const = p.get("image_constraints", {}) |
| aud_const = p.get("audio_constraints", {}) |
| text_const = p.get("text_constraints", {}) |
|
|
| |
| primary = _norm_list(core_sem.get("main_subjects") if isinstance(core_sem, dict) else []) |
| |
| secondary = _norm_list(p.get("secondary_entities", [])) |
| |
| |
| visual_style = _norm_list(style_ctrl.get("visual_style", []) if isinstance(style_ctrl, dict) else []) |
| color_palette = _norm_list(style_ctrl.get("color_palette", []) if isinstance(style_ctrl, dict) else []) |
| lighting = _norm_list(style_ctrl.get("lighting", []) if isinstance(style_ctrl, dict) else []) |
| img_objects = _norm_list(img_const.get("objects", []) if isinstance(img_const, dict) else []) |
| env_details = _norm_list(img_const.get("environment_details", []) if isinstance(img_const, dict) else []) |
| visual_attrs = visual_style + color_palette + lighting + img_objects + env_details |
| |
| |
| style = visual_style |
| |
| |
| mood = _norm_list(style_ctrl.get("mood_emotion", []) if isinstance(style_ctrl, dict) else []) |
| |
| |
| tone = _norm_list(style_ctrl.get("narrative_tone", []) if isinstance(style_ctrl, dict) else []) |
| |
| |
| audio_intent = _norm_list(aud_const.get("audio_intent", []) if isinstance(aud_const, dict) else []) |
| sound_sources = _norm_list(aud_const.get("sound_sources", []) if isinstance(aud_const, dict) else []) |
| ambience = _norm_list(aud_const.get("ambience", []) if isinstance(aud_const, dict) else []) |
| audio_elems = audio_intent + sound_sources + ambience |
| |
| |
| img_must_include = _norm_list(img_const.get("must_include", []) if isinstance(img_const, dict) else []) |
| img_must_avoid = _norm_list(img_const.get("must_avoid", []) if isinstance(img_const, dict) else []) |
| must_include = img_must_include |
| must_avoid = img_must_avoid |
|
|
| |
| |
| |
| |
| brief_parts: List[str] = [] |
|
|
| if scene_summary: |
| brief_parts.append(scene_summary) |
|
|
| if domain: |
| brief_parts.append(f"Domain: {domain}.") |
|
|
| if primary: |
| brief_parts.append(f"Primary entities: {_join(primary)}.") |
| if secondary: |
| brief_parts.append(f"Secondary entities: {_join(secondary)}.") |
|
|
| if visual_attrs: |
| brief_parts.append(f"Visual attributes: {_join(visual_attrs)}.") |
| if style: |
| brief_parts.append(f"Style: {_join(style)}.") |
| if mood: |
| brief_parts.append(f"Mood/emotion: {_join(mood)}.") |
| if tone: |
| brief_parts.append(f"Narrative tone: {_join(tone)}.") |
|
|
| if must_include: |
| brief_parts.append(f"Must include: {_join(must_include)}.") |
| if must_avoid: |
| brief_parts.append(f"Must avoid: {_join(must_avoid)}.") |
|
|
| shared_brief = " ".join([b.strip() for b in brief_parts if b.strip()]) |
|
|
| |
| |
| |
| |
| |
| text_lines: List[str] = [] |
| text_lines.append("Write a vivid, literal description of the exact scene below.") |
| text_lines.append("Do not include instructions, bullets, headings, or meta commentary.") |
| text_lines.append("Do not mention 'prompt' or 'plan'.") |
| text_lines.append("") |
| text_lines.append(shared_brief) |
| text_lines.append("") |
| text_lines.append("Constraints:") |
| if must_include: |
| text_lines.append(f"- Include: {_join(must_include)}") |
| if must_avoid: |
| text_lines.append(f"- Avoid: {_join(must_avoid)}") |
| text_lines.append("- Length: 3 to 6 sentences.") |
|
|
| text_prompt = "\n".join(text_lines).strip() |
|
|
| |
| |
| |
| |
| img_parts: List[str] = [] |
| |
| |
| if scene_summary: |
| img_parts.append(scene_summary) |
| |
| |
| if primary: |
| img_parts.append(_join(primary)) |
| |
| |
| if visual_attrs: |
| |
| key_visuals = visual_attrs[:5] |
| img_parts.append(_join(key_visuals)) |
| |
| |
| if style: |
| img_parts.append(_join(style[:2])) |
| if mood: |
| img_parts.append(_join(mood[:2])) |
| |
| |
| if isinstance(core_sem, dict): |
| setting = core_sem.get("setting", "") |
| time_of_day = core_sem.get("time_of_day", "") |
| weather = core_sem.get("weather", "") |
| if setting: |
| img_parts.append(setting) |
| if time_of_day: |
| img_parts.append(time_of_day) |
| if weather: |
| img_parts.append(weather) |
| |
| |
| image_prompt = ", ".join([p for p in img_parts if p]).strip() |
| |
| |
| if not image_prompt: |
| image_prompt = scene_summary or "scene" |
|
|
| |
| |
| |
| |
| aud_parts: List[str] = [] |
| |
| |
| if scene_summary: |
| aud_parts.append(scene_summary) |
| |
| |
| if sound_sources: |
| aud_parts.append("sounds of " + _join(sound_sources[:4])) |
| if ambience: |
| aud_parts.append("ambient " + _join(ambience[:3])) |
| if audio_intent: |
| aud_parts.append(_join(audio_intent)) |
| |
| |
| if isinstance(core_sem, dict): |
| setting = core_sem.get("setting", "") |
| weather = core_sem.get("weather", "") |
| if weather and weather.lower() not in ["clear", "sunny"]: |
| aud_parts.append(weather.lower() + " weather sounds") |
| if setting: |
| aud_parts.append(setting.lower() + " environment") |
| |
| |
| if isinstance(aud_const, dict): |
| tempo = aud_const.get("tempo", "") |
| if tempo: |
| aud_parts.append(tempo + " tempo") |
| |
| |
| audio_prompt = ", ".join([p for p in aud_parts if p]).strip() |
| |
| |
| if not audio_prompt: |
| audio_prompt = scene_summary or "ambient soundscape" |
| |
| |
| if not audio_prompt.endswith("sound") and not audio_prompt.endswith("audio"): |
| audio_prompt += " soundscape" |
|
|
| return { |
| "text_prompt": text_prompt, |
| "image_prompt": image_prompt, |
| "audio_prompt": audio_prompt, |
| "shared_brief": shared_brief, |
| } |
|
|
|
|
| |
| def plan_to_canonical_text(plan: Any) -> str: |
| """ |
| Legacy: returns the shared brief. Keep this to avoid breaking other imports. |
| """ |
| return plan_to_prompts(plan)["shared_brief"] |