Spaces:
Sleeping
Sleeping
update models (use gpt and gemini)
Browse files- app/main.py +209 -84
app/main.py
CHANGED
|
@@ -15,18 +15,24 @@ from huggingface_hub import HfApi, create_repo, CommitOperationAdd
|
|
| 15 |
from dotenv import load_dotenv
|
| 16 |
load_dotenv()
|
| 17 |
|
| 18 |
-
# -------- Gemini
|
| 19 |
from google import genai
|
| 20 |
from google.genai import types
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
API_KEY = os.getenv("GEMINI_API_KEY", "")
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
PORT = int(os.getenv("PORT", "7860"))
|
| 28 |
|
| 29 |
-
|
|
|
|
| 30 |
|
| 31 |
# -------- FastAPI app --------
|
| 32 |
app = FastAPI(title="Manim Render API (error + visual refine)")
|
|
@@ -83,12 +89,114 @@ class RateLimiter:
|
|
| 83 |
limiter = RateLimiter(10)
|
| 84 |
storyboard_limiter = RateLimiter(30)
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
def gemini_call(*, system: str, contents):
|
| 87 |
"""Wrapper to: enforce RPM and standardize text extraction."""
|
| 88 |
-
if not
|
| 89 |
raise RuntimeError("Gemini client is not configured")
|
| 90 |
limiter.acquire()
|
| 91 |
-
resp =
|
| 92 |
model=MODEL,
|
| 93 |
config=types.GenerateContentConfig(system_instruction=system),
|
| 94 |
contents=contents,
|
|
@@ -98,16 +206,9 @@ def gemini_call(*, system: str, contents):
|
|
| 98 |
|
| 99 |
def gemini_small_call(*, system: str, contents: str) -> str:
|
| 100 |
"""Lightweight wrapper for the storyboard assistant (smaller model)."""
|
| 101 |
-
|
| 102 |
-
raise RuntimeError("Gemini client is not configured")
|
| 103 |
-
target_model = SMALL_MODEL or MODEL
|
| 104 |
storyboard_limiter.acquire()
|
| 105 |
-
|
| 106 |
-
model=target_model,
|
| 107 |
-
config=types.GenerateContentConfig(system_instruction=system),
|
| 108 |
-
contents=contents,
|
| 109 |
-
)
|
| 110 |
-
return getattr(resp, "text", str(resp))
|
| 111 |
|
| 112 |
# ---------------- prompts ----------------
|
| 113 |
SYSTEM_PROMPT = """You are a Manim CE (0.19.x) code generator/refiner.
|
|
@@ -128,7 +229,8 @@ Forbidden: os, subprocess, sys, requests, pathlib, socket, shutil, psutil, any f
|
|
| 128 |
# Common Manim CE 0.19 API constraints (must follow)
|
| 129 |
- Do NOT use `vertex=` with RightAngle(...). Choose the corner by line ordering or set quadrant=(±1, ±1).
|
| 130 |
- Do NOT call `.to_center()` (not a valid method). Use `.center()` or `.move_to(ORIGIN)`.
|
| 131 |
-
- Prefer `.move_to()`, `.align_to()`, `.to_edge()`, `.scale()`, `.next_to()` for layout/placement.
|
|
|
|
| 132 |
"""
|
| 133 |
|
| 134 |
DEFAULT_SCENE = """from manim import *
|
|
@@ -697,14 +799,14 @@ def _run_manim(scene_code: str, run_id: Optional[str] = None, quality: str = "me
|
|
| 697 |
return mp4.read_bytes(), png_path
|
| 698 |
|
| 699 |
def _upload_image_to_gemini(png_path: Path):
|
| 700 |
-
"""
|
| 701 |
-
if not
|
| 702 |
return None
|
| 703 |
limiter.acquire()
|
| 704 |
with open(png_path, "rb") as f:
|
| 705 |
-
file_ref =
|
| 706 |
-
file=f,
|
| 707 |
-
config={"mime_type": "image/png"}
|
| 708 |
)
|
| 709 |
return file_ref
|
| 710 |
|
|
@@ -715,7 +817,7 @@ def llm_generate_manim_code(
|
|
| 715 |
previous_code: Optional[str] = None,
|
| 716 |
) -> str:
|
| 717 |
"""First-pass generation (capture-aware)."""
|
| 718 |
-
if not
|
| 719 |
return DEFAULT_SCENE
|
| 720 |
try:
|
| 721 |
contents = f"Create AutoScene for: {prompt}\nRemember the CAPTURE POLICY and Common API constraints."
|
|
@@ -733,6 +835,8 @@ def llm_generate_manim_code(
|
|
| 733 |
resolution = settings.get("resolution")
|
| 734 |
if resolution:
|
| 735 |
contents += f"\n- Design visuals that read clearly at {resolution}."
|
|
|
|
|
|
|
| 736 |
response_text = gemini_call(system=SYSTEM_PROMPT, contents=contents)
|
| 737 |
code = _clean_code(response_text)
|
| 738 |
if "class AutoScene" not in code:
|
|
@@ -749,8 +853,8 @@ def llm_refine_from_error(
|
|
| 749 |
original_user_prompt: str,
|
| 750 |
settings: Optional[Dict[str, Any]] = None,
|
| 751 |
) -> str:
|
| 752 |
-
"""When Manim fails; send the *real* CLI log/trace to
|
| 753 |
-
if not
|
| 754 |
return previous_code or DEFAULT_SCENE
|
| 755 |
try:
|
| 756 |
trimmed = error_message[-4000:] if error_message else ""
|
|
@@ -769,6 +873,8 @@ Requirements:
|
|
| 769 |
- Fix the bug while preserving the math logic and planned animations.
|
| 770 |
- Keep exactly one class AutoScene(Scene).
|
| 771 |
- Keep the CAPTURE POLICY and ensure # CAPTURE_POINT is at the final steady layout.
|
|
|
|
|
|
|
| 772 |
- Scan for nonexistent methods (e.g., `.to_center`) or invalid kwargs (e.g., `vertex=` on RightAngle) and replace with valid Manim CE 0.19 API.
|
| 773 |
- Prefer `.center()`/`.move_to(ORIGIN)`, and `.move_to()`, `.align_to()`, `.to_edge()`, `.next_to()` for layout.
|
| 774 |
- Apply the smallest change necessary to resolve the failure; do not overhaul structure, pacing, or stylistic choices the user made.
|
|
@@ -811,7 +917,7 @@ def llm_visual_refine_from_image(
|
|
| 811 |
Use the screenshot to request layout/legibility/placement fixes.
|
| 812 |
Includes the original prompt and current code, and asks for minimal edits.
|
| 813 |
"""
|
| 814 |
-
if not
|
| 815 |
return previous_code
|
| 816 |
try:
|
| 817 |
file_ref = _upload_image_to_gemini(png_path)
|
|
@@ -829,6 +935,8 @@ Tasks (optimize for readability and visual quality without changing the math mea
|
|
| 829 |
- Fix layout issues (overlaps, cramped margins, alignment, consistent scaling).
|
| 830 |
- Improve text legibility (minimum size ~32 px at 854x480, adequate contrast).
|
| 831 |
- Ensure all intended elements are visible at the capture point.
|
|
|
|
|
|
|
| 832 |
- Keep animation semantics as-is unless they're obviously broken.
|
| 833 |
- Keep exactly one class AutoScene(Scene).
|
| 834 |
- Preserve the CAPTURE POLICY and place `# CAPTURE_POINT` at the final steady layout with self.wait(0.75) and NO outro after that.
|
|
@@ -862,11 +970,54 @@ Return ONLY the revised Python code (no backticks).
|
|
| 862 |
traceback.print_exc()
|
| 863 |
return previous_code
|
| 864 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 865 |
def refine_loop(
|
| 866 |
user_prompt: str,
|
| 867 |
settings: Optional[Dict[str, Any]] = None,
|
| 868 |
max_error_refines: int = 3,
|
| 869 |
-
do_visual_refine: bool =
|
| 870 |
) -> bytes:
|
| 871 |
"""
|
| 872 |
Generate → render; on error, refine up to N times from Manim traceback → re-render.
|
|
@@ -874,58 +1025,32 @@ def refine_loop(
|
|
| 874 |
using the saved steady-state PNG, then re-render. Fallback to the best successful MP4.
|
| 875 |
"""
|
| 876 |
# 1) initial generation (capture-aware)
|
| 877 |
-
|
| 878 |
quality = _quality_from_settings(settings)
|
| 879 |
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
if attempts >= max_error_refines:
|
| 904 |
-
raise
|
| 905 |
-
except Exception:
|
| 906 |
-
last_err = traceback.format_exc()
|
| 907 |
-
if attempts >= max_error_refines:
|
| 908 |
-
raise
|
| 909 |
-
except Exception:
|
| 910 |
-
print("Unexpected error path; refining from Python traceback...", file=sys.stderr)
|
| 911 |
-
attempts = 0
|
| 912 |
-
last_err = traceback.format_exc()
|
| 913 |
-
while attempts < max_error_refines:
|
| 914 |
-
attempts += 1
|
| 915 |
-
refined = llm_refine_from_error(
|
| 916 |
-
previous_code=code,
|
| 917 |
-
error_message=last_err,
|
| 918 |
-
original_user_prompt=user_prompt,
|
| 919 |
-
settings=settings,
|
| 920 |
-
)
|
| 921 |
-
try:
|
| 922 |
-
mp4_bytes, png_path = _run_manim(refined, run_id=f"iter_err_{attempts}", quality=quality)
|
| 923 |
-
code = refined
|
| 924 |
-
break
|
| 925 |
-
except Exception:
|
| 926 |
-
last_err = traceback.format_exc()
|
| 927 |
-
if attempts >= max_error_refines:
|
| 928 |
-
raise
|
| 929 |
|
| 930 |
# 3) optional visual refinement loop
|
| 931 |
if do_visual_refine and png_path and png_path.exists():
|
|
@@ -954,7 +1079,7 @@ def _auto_fix_render(
|
|
| 954 |
max_attempts: int = 3,
|
| 955 |
) -> Tuple[Optional[str], Optional[bytes], str]:
|
| 956 |
"""Attempt to auto-fix user code via LLM refinement if available."""
|
| 957 |
-
if not
|
| 958 |
return None, None, initial_log
|
| 959 |
quality = _quality_from_settings(settings)
|
| 960 |
attempt_code = code
|
|
@@ -983,8 +1108,8 @@ def _auto_fix_render(
|
|
| 983 |
# ---------------- API ----------------
|
| 984 |
@app.post("/storyboard/chat")
|
| 985 |
def storyboard_chat(inp: StoryboardChatIn):
|
| 986 |
-
if not
|
| 987 |
-
raise HTTPException(500, "
|
| 988 |
if not inp.message.strip() and not inp.plan:
|
| 989 |
raise HTTPException(400, "Message or plan updates are required.")
|
| 990 |
|
|
@@ -1022,8 +1147,8 @@ def storyboard_chat(inp: StoryboardChatIn):
|
|
| 1022 |
|
| 1023 |
@app.post("/storyboard/confirm")
|
| 1024 |
def storyboard_confirm(inp: StoryboardConfirmIn):
|
| 1025 |
-
if not
|
| 1026 |
-
raise HTTPException(500, "
|
| 1027 |
|
| 1028 |
session = _get_or_create_session(inp.session_id, inp.settings or {})
|
| 1029 |
if inp.settings:
|
|
@@ -1122,7 +1247,7 @@ def generate_code(inp: GenerateCodeIn):
|
|
| 1122 |
@app.post("/generate-and-render")
|
| 1123 |
def generate_and_render(inp: PromptIn):
|
| 1124 |
try:
|
| 1125 |
-
mp4 = refine_loop(inp.prompt, settings=inp.settings, max_error_refines=3, do_visual_refine=
|
| 1126 |
except Exception:
|
| 1127 |
raise HTTPException(500, "Failed to produce video after refinement")
|
| 1128 |
return Response(
|
|
|
|
| 15 |
from dotenv import load_dotenv
|
| 16 |
load_dotenv()
|
| 17 |
|
| 18 |
+
# -------- Gemini + GPT client setup --------
|
| 19 |
from google import genai
|
| 20 |
from google.genai import types
|
| 21 |
|
| 22 |
+
try:
|
| 23 |
+
from openai import OpenAI
|
| 24 |
+
except ImportError:
|
| 25 |
+
OpenAI = None
|
| 26 |
+
|
| 27 |
+
# We keep the GEMINI_* env vars for compatibility.
|
| 28 |
API_KEY = os.getenv("GEMINI_API_KEY", "")
|
| 29 |
+
MODEL = os.getenv("GEMINI_MODEL", "gemini-2.0-pro")
|
| 30 |
+
SMALL_MODEL = os.getenv("GEMINI_SMALL_MODEL")
|
| 31 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or API_KEY
|
| 32 |
PORT = int(os.getenv("PORT", "7860"))
|
| 33 |
|
| 34 |
+
gemini_client = genai.Client(api_key=API_KEY) if API_KEY else None
|
| 35 |
+
gpt_client = OpenAI(api_key=OPENAI_API_KEY) if (OPENAI_API_KEY and OpenAI) else None
|
| 36 |
|
| 37 |
# -------- FastAPI app --------
|
| 38 |
app = FastAPI(title="Manim Render API (error + visual refine)")
|
|
|
|
| 89 |
limiter = RateLimiter(10)
|
| 90 |
storyboard_limiter = RateLimiter(30)
|
| 91 |
|
| 92 |
+
def _to_chat_content_item(item: Any) -> Any:
|
| 93 |
+
if isinstance(item, str):
|
| 94 |
+
return {"type": "text", "text": item}
|
| 95 |
+
if isinstance(item, dict):
|
| 96 |
+
return item
|
| 97 |
+
return {"type": "text", "text": str(item)}
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def _to_response_content_item(item: Any) -> Dict[str, Any]:
|
| 101 |
+
if isinstance(item, str):
|
| 102 |
+
return {"type": "input_text", "text": item}
|
| 103 |
+
if isinstance(item, dict):
|
| 104 |
+
itype = item.get("type")
|
| 105 |
+
if itype == "text":
|
| 106 |
+
return {"type": "input_text", "text": item.get("text", "")}
|
| 107 |
+
if itype == "image_url":
|
| 108 |
+
image_url = item.get("image_url", {})
|
| 109 |
+
if isinstance(image_url, dict):
|
| 110 |
+
return {"type": "input_image", "image_url": image_url}
|
| 111 |
+
return {"type": "input_image", "image_url": {"url": str(image_url)}}
|
| 112 |
+
if itype in {"input_text", "input_image", "input_file"}:
|
| 113 |
+
return item
|
| 114 |
+
return {"type": "input_text", "text": str(item)}
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def _build_openai_content(contents: Any, *, for_chat: bool) -> Any:
|
| 118 |
+
"""
|
| 119 |
+
Normalize content payloads for chat (strings or multimodal lists) and responses API (typed blocks).
|
| 120 |
+
"""
|
| 121 |
+
if isinstance(contents, str):
|
| 122 |
+
return contents if for_chat else [_to_response_content_item(contents)]
|
| 123 |
+
if isinstance(contents, (list, tuple)):
|
| 124 |
+
if for_chat:
|
| 125 |
+
return [_to_chat_content_item(item) for item in contents]
|
| 126 |
+
return [_to_response_content_item(item) for item in contents]
|
| 127 |
+
return contents if for_chat else [_to_response_content_item(contents)]
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def _build_chat_messages(system: str, contents: Any) -> List[Dict[str, Any]]:
|
| 131 |
+
return [
|
| 132 |
+
{"role": "system", "content": system},
|
| 133 |
+
{"role": "user", "content": _build_openai_content(contents, for_chat=True)},
|
| 134 |
+
]
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def _build_responses_input(system: str, contents: Any) -> List[Dict[str, Any]]:
|
| 138 |
+
return [
|
| 139 |
+
{"role": "system", "content": _build_openai_content(system, for_chat=False)},
|
| 140 |
+
{"role": "user", "content": _build_openai_content(contents, for_chat=False)},
|
| 141 |
+
]
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def _extract_chat_content(resp: Any) -> str:
|
| 145 |
+
content = resp.choices[0].message.content
|
| 146 |
+
if isinstance(content, str):
|
| 147 |
+
return content
|
| 148 |
+
if isinstance(content, list):
|
| 149 |
+
text_parts = []
|
| 150 |
+
for chunk in content:
|
| 151 |
+
if isinstance(chunk, dict) and chunk.get("type") == "text":
|
| 152 |
+
text_parts.append(chunk.get("text", ""))
|
| 153 |
+
else:
|
| 154 |
+
text_parts.append(str(chunk))
|
| 155 |
+
return "\n".join(filter(None, text_parts))
|
| 156 |
+
return str(content)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def _extract_responses_content(resp: Any) -> str:
|
| 160 |
+
text = getattr(resp, "output_text", None)
|
| 161 |
+
if text:
|
| 162 |
+
return text
|
| 163 |
+
output = getattr(resp, "output", None)
|
| 164 |
+
if output:
|
| 165 |
+
chunks = []
|
| 166 |
+
for item in output:
|
| 167 |
+
for elem in getattr(item, "content", []) or []:
|
| 168 |
+
chunk_text = getattr(elem, "text", None) or getattr(elem, "content", None)
|
| 169 |
+
if chunk_text:
|
| 170 |
+
chunks.append(chunk_text)
|
| 171 |
+
if chunks:
|
| 172 |
+
return "\n".join(map(str, chunks))
|
| 173 |
+
return str(resp)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def _invoke_gpt_model(model: str, system: str, contents: Any) -> str:
|
| 177 |
+
if not gpt_client:
|
| 178 |
+
raise RuntimeError("GPT client is not configured")
|
| 179 |
+
messages = _build_chat_messages(system, contents)
|
| 180 |
+
try:
|
| 181 |
+
resp = gpt_client.chat.completions.create(model=model, messages=messages)
|
| 182 |
+
return _extract_chat_content(resp)
|
| 183 |
+
except Exception as err:
|
| 184 |
+
message = str(err)
|
| 185 |
+
if "only supported in v1/responses" not in message:
|
| 186 |
+
raise
|
| 187 |
+
resp = gpt_client.responses.create(
|
| 188 |
+
model=model,
|
| 189 |
+
input=_build_responses_input(system, contents),
|
| 190 |
+
)
|
| 191 |
+
return _extract_responses_content(resp)
|
| 192 |
+
|
| 193 |
+
|
| 194 |
def gemini_call(*, system: str, contents):
|
| 195 |
"""Wrapper to: enforce RPM and standardize text extraction."""
|
| 196 |
+
if not gemini_client:
|
| 197 |
raise RuntimeError("Gemini client is not configured")
|
| 198 |
limiter.acquire()
|
| 199 |
+
resp = gemini_client.models.generate_content(
|
| 200 |
model=MODEL,
|
| 201 |
config=types.GenerateContentConfig(system_instruction=system),
|
| 202 |
contents=contents,
|
|
|
|
| 206 |
|
| 207 |
def gemini_small_call(*, system: str, contents: str) -> str:
|
| 208 |
"""Lightweight wrapper for the storyboard assistant (smaller model)."""
|
| 209 |
+
target_model = SMALL_MODEL or "gpt-4o-mini"
|
|
|
|
|
|
|
| 210 |
storyboard_limiter.acquire()
|
| 211 |
+
return _invoke_gpt_model(target_model, system, contents)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
# ---------------- prompts ----------------
|
| 214 |
SYSTEM_PROMPT = """You are a Manim CE (0.19.x) code generator/refiner.
|
|
|
|
| 229 |
# Common Manim CE 0.19 API constraints (must follow)
|
| 230 |
- Do NOT use `vertex=` with RightAngle(...). Choose the corner by line ordering or set quadrant=(±1, ±1).
|
| 231 |
- Do NOT call `.to_center()` (not a valid method). Use `.center()` or `.move_to(ORIGIN)`.
|
| 232 |
+
- Prefer `.move_to()`, `.align_to()`, `.to_edge()`, `.scale()`, `.next_to()` for layout/placement, keeping generous spacing (buff ≥ 0.6) so nothing overlaps.
|
| 233 |
+
- Only introduce objects that directly support the user's request. Avoid decorative or redundant elements that clutter the scene.
|
| 234 |
"""
|
| 235 |
|
| 236 |
DEFAULT_SCENE = """from manim import *
|
|
|
|
| 799 |
return mp4.read_bytes(), png_path
|
| 800 |
|
| 801 |
def _upload_image_to_gemini(png_path: Path):
|
| 802 |
+
"""Prepare an inline data URI that the OpenAI vision API accepts."""
|
| 803 |
+
if not gemini_client or not png_path or not png_path.exists():
|
| 804 |
return None
|
| 805 |
limiter.acquire()
|
| 806 |
with open(png_path, "rb") as f:
|
| 807 |
+
file_ref = gemini_client.files.upload(
|
| 808 |
+
file=f,
|
| 809 |
+
config={"mime_type": "image/png"},
|
| 810 |
)
|
| 811 |
return file_ref
|
| 812 |
|
|
|
|
| 817 |
previous_code: Optional[str] = None,
|
| 818 |
) -> str:
|
| 819 |
"""First-pass generation (capture-aware)."""
|
| 820 |
+
if not gemini_client:
|
| 821 |
return DEFAULT_SCENE
|
| 822 |
try:
|
| 823 |
contents = f"Create AutoScene for: {prompt}\nRemember the CAPTURE POLICY and Common API constraints."
|
|
|
|
| 835 |
resolution = settings.get("resolution")
|
| 836 |
if resolution:
|
| 837 |
contents += f"\n- Design visuals that read clearly at {resolution}."
|
| 838 |
+
contents += "\nLayout requirement: ensure every element has clear separation—absolutely no overlaps at the capture point."
|
| 839 |
+
contents += "\nKeep the composition minimal: only include elements explicitly needed for the prompt."
|
| 840 |
response_text = gemini_call(system=SYSTEM_PROMPT, contents=contents)
|
| 841 |
code = _clean_code(response_text)
|
| 842 |
if "class AutoScene" not in code:
|
|
|
|
| 853 |
original_user_prompt: str,
|
| 854 |
settings: Optional[Dict[str, Any]] = None,
|
| 855 |
) -> str:
|
| 856 |
+
"""When Manim fails; send the *real* CLI log/trace to the LLM."""
|
| 857 |
+
if not gemini_client:
|
| 858 |
return previous_code or DEFAULT_SCENE
|
| 859 |
try:
|
| 860 |
trimmed = error_message[-4000:] if error_message else ""
|
|
|
|
| 873 |
- Fix the bug while preserving the math logic and planned animations.
|
| 874 |
- Keep exactly one class AutoScene(Scene).
|
| 875 |
- Keep the CAPTURE POLICY and ensure # CAPTURE_POINT is at the final steady layout.
|
| 876 |
+
- Eliminate any overlapping elements; maintain clear spacing at the capture point.
|
| 877 |
+
- Remove any objects that are not necessary for the prompt or storyboard; keep the scene concise.
|
| 878 |
- Scan for nonexistent methods (e.g., `.to_center`) or invalid kwargs (e.g., `vertex=` on RightAngle) and replace with valid Manim CE 0.19 API.
|
| 879 |
- Prefer `.center()`/`.move_to(ORIGIN)`, and `.move_to()`, `.align_to()`, `.to_edge()`, `.next_to()` for layout.
|
| 880 |
- Apply the smallest change necessary to resolve the failure; do not overhaul structure, pacing, or stylistic choices the user made.
|
|
|
|
| 917 |
Use the screenshot to request layout/legibility/placement fixes.
|
| 918 |
Includes the original prompt and current code, and asks for minimal edits.
|
| 919 |
"""
|
| 920 |
+
if not gemini_client or not png_path or not png_path.exists():
|
| 921 |
return previous_code
|
| 922 |
try:
|
| 923 |
file_ref = _upload_image_to_gemini(png_path)
|
|
|
|
| 935 |
- Fix layout issues (overlaps, cramped margins, alignment, consistent scaling).
|
| 936 |
- Improve text legibility (minimum size ~32 px at 854x480, adequate contrast).
|
| 937 |
- Ensure all intended elements are visible at the capture point.
|
| 938 |
+
- Remove any overlapping elements; keep generous spacing between visuals.
|
| 939 |
+
- Remove decorative or redundant elements that are not required by the user's prompt or storyboard.
|
| 940 |
- Keep animation semantics as-is unless they're obviously broken.
|
| 941 |
- Keep exactly one class AutoScene(Scene).
|
| 942 |
- Preserve the CAPTURE POLICY and place `# CAPTURE_POINT` at the final steady layout with self.wait(0.75) and NO outro after that.
|
|
|
|
| 970 |
traceback.print_exc()
|
| 971 |
return previous_code
|
| 972 |
|
| 973 |
+
|
| 974 |
+
def _attempt_render_with_refine(
|
| 975 |
+
base_code: str,
|
| 976 |
+
*,
|
| 977 |
+
user_prompt: str,
|
| 978 |
+
settings: Optional[Dict[str, Any]],
|
| 979 |
+
quality: str,
|
| 980 |
+
run_prefix: str,
|
| 981 |
+
max_refines: int,
|
| 982 |
+
) -> Tuple[Optional[str], Optional[bytes], Optional[Path], str]:
|
| 983 |
+
"""
|
| 984 |
+
Try to render `base_code`, refining up to `max_refines` times using Gemini on failure.
|
| 985 |
+
Returns tuple: (final_code, video_bytes, png_path, last_error_log).
|
| 986 |
+
If rendering still fails, code/video/png are None and last_error_log carries the last trace.
|
| 987 |
+
"""
|
| 988 |
+
attempts = 0
|
| 989 |
+
current_code = base_code
|
| 990 |
+
last_log = ""
|
| 991 |
+
|
| 992 |
+
while True:
|
| 993 |
+
try:
|
| 994 |
+
mp4_bytes, png_path = _run_manim(
|
| 995 |
+
current_code,
|
| 996 |
+
run_id=f"{run_prefix}_try{attempts}",
|
| 997 |
+
quality=quality,
|
| 998 |
+
)
|
| 999 |
+
return current_code, mp4_bytes, png_path, ""
|
| 1000 |
+
except RenderError as err:
|
| 1001 |
+
last_log = err.log or last_log
|
| 1002 |
+
except Exception:
|
| 1003 |
+
last_log = traceback.format_exc()
|
| 1004 |
+
|
| 1005 |
+
if attempts >= max_refines:
|
| 1006 |
+
return None, None, None, last_log
|
| 1007 |
+
|
| 1008 |
+
attempts += 1
|
| 1009 |
+
current_code = llm_refine_from_error(
|
| 1010 |
+
previous_code=current_code,
|
| 1011 |
+
error_message=last_log,
|
| 1012 |
+
original_user_prompt=user_prompt,
|
| 1013 |
+
settings=settings,
|
| 1014 |
+
)
|
| 1015 |
+
|
| 1016 |
def refine_loop(
|
| 1017 |
user_prompt: str,
|
| 1018 |
settings: Optional[Dict[str, Any]] = None,
|
| 1019 |
max_error_refines: int = 3,
|
| 1020 |
+
do_visual_refine: bool = False,
|
| 1021 |
) -> bytes:
|
| 1022 |
"""
|
| 1023 |
Generate → render; on error, refine up to N times from Manim traceback → re-render.
|
|
|
|
| 1025 |
using the saved steady-state PNG, then re-render. Fallback to the best successful MP4.
|
| 1026 |
"""
|
| 1027 |
# 1) initial generation (capture-aware)
|
| 1028 |
+
initial_code = llm_generate_manim_code(user_prompt, settings=settings)
|
| 1029 |
quality = _quality_from_settings(settings)
|
| 1030 |
|
| 1031 |
+
code, mp4_bytes, png_path, last_log = _attempt_render_with_refine(
|
| 1032 |
+
initial_code,
|
| 1033 |
+
user_prompt=user_prompt,
|
| 1034 |
+
settings=settings,
|
| 1035 |
+
quality=quality,
|
| 1036 |
+
run_prefix="primary",
|
| 1037 |
+
max_refines=max_error_refines,
|
| 1038 |
+
)
|
| 1039 |
+
|
| 1040 |
+
if code is None:
|
| 1041 |
+
print("Primary render failed after refinements; generating fallback code...", file=sys.stderr)
|
| 1042 |
+
fallback_code = llm_generate_manim_code(user_prompt, settings=settings)
|
| 1043 |
+
code, mp4_bytes, png_path, last_log = _attempt_render_with_refine(
|
| 1044 |
+
fallback_code,
|
| 1045 |
+
user_prompt=user_prompt,
|
| 1046 |
+
settings=settings,
|
| 1047 |
+
quality=quality,
|
| 1048 |
+
run_prefix="fallback",
|
| 1049 |
+
max_refines=2,
|
| 1050 |
+
)
|
| 1051 |
+
if code is None:
|
| 1052 |
+
error_message = last_log or "Render failed after fallback attempts."
|
| 1053 |
+
raise RenderError(error_message)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1054 |
|
| 1055 |
# 3) optional visual refinement loop
|
| 1056 |
if do_visual_refine and png_path and png_path.exists():
|
|
|
|
| 1079 |
max_attempts: int = 3,
|
| 1080 |
) -> Tuple[Optional[str], Optional[bytes], str]:
|
| 1081 |
"""Attempt to auto-fix user code via LLM refinement if available."""
|
| 1082 |
+
if not gemini_client:
|
| 1083 |
return None, None, initial_log
|
| 1084 |
quality = _quality_from_settings(settings)
|
| 1085 |
attempt_code = code
|
|
|
|
| 1108 |
# ---------------- API ----------------
|
| 1109 |
@app.post("/storyboard/chat")
|
| 1110 |
def storyboard_chat(inp: StoryboardChatIn):
|
| 1111 |
+
if not gpt_client:
|
| 1112 |
+
raise HTTPException(500, "Storyboard model is not configured")
|
| 1113 |
if not inp.message.strip() and not inp.plan:
|
| 1114 |
raise HTTPException(400, "Message or plan updates are required.")
|
| 1115 |
|
|
|
|
| 1147 |
|
| 1148 |
@app.post("/storyboard/confirm")
|
| 1149 |
def storyboard_confirm(inp: StoryboardConfirmIn):
|
| 1150 |
+
if not gpt_client:
|
| 1151 |
+
raise HTTPException(500, "Storyboard model is not configured")
|
| 1152 |
|
| 1153 |
session = _get_or_create_session(inp.session_id, inp.settings or {})
|
| 1154 |
if inp.settings:
|
|
|
|
| 1247 |
@app.post("/generate-and-render")
|
| 1248 |
def generate_and_render(inp: PromptIn):
|
| 1249 |
try:
|
| 1250 |
+
mp4 = refine_loop(inp.prompt, settings=inp.settings, max_error_refines=3, do_visual_refine=False)
|
| 1251 |
except Exception:
|
| 1252 |
raise HTTPException(500, "Failed to produce video after refinement")
|
| 1253 |
return Response(
|