Verdiola commited on
Commit
07195ee
·
verified ·
1 Parent(s): 75181f9

another gpt-mini fix

Browse files
Files changed (1) hide show
  1. main.py +1450 -0
main.py ADDED
@@ -0,0 +1,1450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json, os, re, uuid, subprocess, sys, time, traceback, threading, base64
2
+ from io import BytesIO
3
+ from collections import deque
4
+ from pathlib import Path
5
+ from typing import Optional, Tuple, List, Dict, Any
6
+ from dataclasses import dataclass, field
7
+ from contextlib import contextmanager
8
+
9
+ from fastapi import FastAPI, HTTPException, Response
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import BaseModel, validator
12
+
13
+ from huggingface_hub import HfApi, create_repo, CommitOperationAdd
14
+
15
+ # Optional .env for local testing
16
+ from dotenv import load_dotenv
17
+ load_dotenv()
18
+
19
+ # -------- Gemini + GPT client setup --------
20
+ from google import genai
21
+ from google.genai import types
22
+
23
+ try:
24
+ from openai import OpenAI
25
+ except ImportError:
26
+ OpenAI = None
27
+
28
+ # We keep the GEMINI_* env vars for compatibility.
29
+ API_KEY = os.getenv("GEMINI_API_KEY", "")
30
+ MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-pro")
31
+ GEMINI_SMALL_MODEL = os.getenv("GEMINI_SMALL_MODEL")
32
+ DEFAULT_OPENAI_SMALL_MODEL = "gpt-4o-mini"
33
+ OPENAI_SMALL_MODEL = os.getenv("OPENAI_SMALL_MODEL") or DEFAULT_OPENAI_SMALL_MODEL
34
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
35
+ _OPENAI_ENV = os.getenv("USE_OPENAI")
36
+ if _OPENAI_ENV is None:
37
+ USE_OPENAI = bool(OPENAI_API_KEY)
38
+ else:
39
+ USE_OPENAI = _OPENAI_ENV.lower() == "true"
40
+ PORT = int(os.getenv("PORT", "7860"))
41
+
42
+ _OPENAI_RESPONSES_MODELS_ENV = os.getenv("OPENAI_RESPONSES_MODELS", "")
43
+ RESPONSES_API_MODEL_NAMES = {"gpt-5-mini"}
44
+ if _OPENAI_RESPONSES_MODELS_ENV:
45
+ RESPONSES_API_MODEL_NAMES.update(
46
+ model.strip().lower()
47
+ for model in _OPENAI_RESPONSES_MODELS_ENV.split(",")
48
+ if model.strip()
49
+ )
50
+
51
+ _OPENAI_RESPONSES_PREFIXES_ENV = os.getenv("OPENAI_RESPONSES_PREFIXES", "")
52
+ _RESPONSES_API_MODEL_PREFIXES = ["gpt-5"]
53
+ if _OPENAI_RESPONSES_PREFIXES_ENV:
54
+ _RESPONSES_API_MODEL_PREFIXES.extend(
55
+ prefix.strip().lower()
56
+ for prefix in _OPENAI_RESPONSES_PREFIXES_ENV.split(",")
57
+ if prefix.strip()
58
+ )
59
+ RESPONSES_API_MODEL_PREFIXES = tuple(_RESPONSES_API_MODEL_PREFIXES)
60
+ RESPONSES_API_ERROR_HINTS = (
61
+ "only supported in v1/responses",
62
+ "use the responses api",
63
+ "use the responses endpoint",
64
+ "please call the responses api",
65
+ "please use the responses endpoint",
66
+ )
67
+
68
+ gemini_client = genai.Client(api_key=API_KEY) if API_KEY else None
69
+ gpt_client = OpenAI(api_key=OPENAI_API_KEY) if (OPENAI_API_KEY and OpenAI and USE_OPENAI) else None
70
+
71
+ # -------- FastAPI app --------
72
+ app = FastAPI(title="Manim Render API (error + visual refine)")
73
+ app.add_middleware(
74
+ CORSMiddleware,
75
+ allow_origins=["*"], # tighten in prod
76
+ allow_methods=["*"],
77
+ allow_headers=["*"],
78
+ )
79
+
80
+
81
+ RUNS = Path("runs"); RUNS.mkdir(parents=True, exist_ok=True)
82
+
83
+ HF_DATASET_ID = os.getenv("HF_DATASET_ID", "MathFrames/email-log")
84
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
85
+
86
+ hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
87
+ if hf_api:
88
+ try:
89
+ create_repo(
90
+ HF_DATASET_ID,
91
+ repo_type="dataset",
92
+ private=True,
93
+ exist_ok=True,
94
+ token=HF_TOKEN,
95
+ )
96
+ except Exception:
97
+ # Ignore startup race/permission errors; individual writes will surface issues.
98
+ pass
99
+
100
+ # ---------------- simple 10 RPM rate limiter ----------------
101
+ class RateLimiter:
102
+ def __init__(self, max_per_minute: int):
103
+ self.max = max_per_minute
104
+ self.lock = threading.Lock()
105
+ self.events = deque() # timestamps (time.time())
106
+
107
+ def acquire(self):
108
+ with self.lock:
109
+ now = time.time()
110
+ # drop events older than 60s
111
+ while self.events and now - self.events[0] >= 60:
112
+ self.events.popleft()
113
+ if len(self.events) < self.max:
114
+ self.events.append(now)
115
+ return
116
+ # need to wait until the oldest is 60s old
117
+ wait_for = 60 - (now - self.events[0])
118
+ if wait_for > 0:
119
+ time.sleep(wait_for + 0.01)
120
+ # recurse once to record post-sleep
121
+ self.acquire()
122
+
123
+ limiter = RateLimiter(10)
124
+ storyboard_limiter = RateLimiter(30)
125
+ RENDER_LOCK = threading.Lock()
126
+
127
+
128
+ @contextmanager
129
+ def acquire_render_slot(timeout: Optional[float] = None):
130
+ """
131
+ Global render queue: only one Manim render runs at a time.
132
+ Blocks until the lock is available (optional timeout).
133
+ """
134
+ if timeout is None:
135
+ acquired = RENDER_LOCK.acquire()
136
+ else:
137
+ acquired = RENDER_LOCK.acquire(timeout=timeout)
138
+ if not acquired:
139
+ raise RuntimeError("Render queue is busy; try again shortly.")
140
+ try:
141
+ yield
142
+ finally:
143
+ RENDER_LOCK.release()
144
+
145
+ def _to_chat_content_item(item: Any) -> Any:
146
+ if isinstance(item, str):
147
+ return {"type": "text", "text": item}
148
+ if isinstance(item, dict):
149
+ return item
150
+ return {"type": "text", "text": str(item)}
151
+
152
+
153
+ def _to_response_content_item(item: Any) -> Dict[str, Any]:
154
+ if isinstance(item, str):
155
+ return {"type": "input_text", "text": item}
156
+ if isinstance(item, dict):
157
+ itype = item.get("type")
158
+ if itype == "text":
159
+ return {"type": "input_text", "text": item.get("text", "")}
160
+ if itype == "image_url":
161
+ image_url = item.get("image_url", {})
162
+ if isinstance(image_url, dict):
163
+ return {"type": "input_image", "image_url": image_url}
164
+ return {"type": "input_image", "image_url": {"url": str(image_url)}}
165
+ if itype in {"input_text", "input_image", "input_file"}:
166
+ return item
167
+ return {"type": "input_text", "text": str(item)}
168
+
169
+
170
+ def _build_openai_content(contents: Any, *, for_chat: bool) -> Any:
171
+ """
172
+ Normalize content payloads for chat (strings or multimodal lists) and responses API (typed blocks).
173
+ """
174
+ if isinstance(contents, str):
175
+ return contents if for_chat else [_to_response_content_item(contents)]
176
+ if isinstance(contents, (list, tuple)):
177
+ if for_chat:
178
+ return [_to_chat_content_item(item) for item in contents]
179
+ return [_to_response_content_item(item) for item in contents]
180
+ return contents if for_chat else [_to_response_content_item(contents)]
181
+
182
+
183
+ def _build_chat_messages(system: str, contents: Any) -> List[Dict[str, Any]]:
184
+ return [
185
+ {"role": "system", "content": system},
186
+ {"role": "user", "content": _build_openai_content(contents, for_chat=True)},
187
+ ]
188
+
189
+
190
+ def _build_responses_input(system: str, contents: Any) -> List[Dict[str, Any]]:
191
+ return [
192
+ {"role": "system", "content": _build_openai_content(system, for_chat=False)},
193
+ {"role": "user", "content": _build_openai_content(contents, for_chat=False)},
194
+ ]
195
+
196
+ def _requires_responses_api(model: str) -> bool:
197
+ lowered = (model or "").lower()
198
+ if not lowered:
199
+ return False
200
+ if lowered in RESPONSES_API_MODEL_NAMES:
201
+ return True
202
+ return any(
203
+ prefix and lowered.startswith(prefix)
204
+ for prefix in RESPONSES_API_MODEL_PREFIXES
205
+ )
206
+
207
+
208
+ def _should_use_responses_fallback(err: Exception) -> bool:
209
+ message = str(err).lower()
210
+ return any(hint in message for hint in RESPONSES_API_ERROR_HINTS)
211
+
212
+
213
+ def _extract_chat_content(resp: Any) -> str:
214
+ content = resp.choices[0].message.content
215
+ if isinstance(content, str):
216
+ return content
217
+ if isinstance(content, list):
218
+ text_parts = []
219
+ for chunk in content:
220
+ if isinstance(chunk, dict) and chunk.get("type") == "text":
221
+ text_parts.append(chunk.get("text", ""))
222
+ else:
223
+ text_parts.append(str(chunk))
224
+ return "\n".join(filter(None, text_parts))
225
+ return str(content)
226
+
227
+
228
+ def _extract_responses_content(resp: Any) -> str:
229
+ text = getattr(resp, "output_text", None)
230
+ if text:
231
+ return text
232
+ output = getattr(resp, "output", None)
233
+ if output:
234
+ chunks = []
235
+ for item in output:
236
+ for elem in getattr(item, "content", []) or []:
237
+ chunk_text = getattr(elem, "text", None) or getattr(elem, "content", None)
238
+ if chunk_text:
239
+ chunks.append(chunk_text)
240
+ if chunks:
241
+ return "\n".join(map(str, chunks))
242
+ return str(resp)
243
+
244
+
245
+ def _invoke_gpt_model(model: str, system: str, contents: Any) -> str:
246
+ if not gpt_client:
247
+ raise RuntimeError("GPT client is not configured")
248
+ messages = _build_chat_messages(system, contents)
249
+ responses_input: Optional[List[Dict[str, Any]]] = None
250
+ if _requires_responses_api(model):
251
+ responses_input = _build_responses_input(system, contents)
252
+ resp = gpt_client.responses.create(model=model, input=responses_input)
253
+ return _extract_responses_content(resp)
254
+ try:
255
+ resp = gpt_client.chat.completions.create(model=model, messages=messages)
256
+ return _extract_chat_content(resp)
257
+ except Exception as err:
258
+ if not _should_use_responses_fallback(err):
259
+ raise
260
+ if responses_input is None:
261
+ responses_input = _build_responses_input(system, contents)
262
+ resp = gpt_client.responses.create(model=model, input=responses_input)
263
+ return _extract_responses_content(resp)
264
+
265
+
266
+ def gemini_call(*, system: str, contents):
267
+ """Wrapper to: enforce RPM and standardize text extraction."""
268
+ if not gemini_client:
269
+ raise RuntimeError("Gemini client is not configured")
270
+ limiter.acquire()
271
+ resp = gemini_client.models.generate_content(
272
+ model=MODEL,
273
+ config=types.GenerateContentConfig(system_instruction=system),
274
+ contents=contents,
275
+ )
276
+ return getattr(resp, "text", str(resp))
277
+
278
+
279
+ def gemini_small_call(*, system: str, contents: str) -> str:
280
+ """Lightweight wrapper for the storyboard assistant using a smaller model with Gemini fallback."""
281
+ storyboard_limiter.acquire()
282
+ if gpt_client:
283
+ target_model = OPENAI_SMALL_MODEL
284
+ return _invoke_gpt_model(target_model, system, contents)
285
+ if not gemini_client:
286
+ raise RuntimeError("Gemini client is not configured")
287
+ fallback_model = (GEMINI_SMALL_MODEL or MODEL) or MODEL
288
+ if (
289
+ not fallback_model
290
+ or _requires_responses_api(fallback_model)
291
+ or str(fallback_model).lower().startswith("gpt-")
292
+ ):
293
+ fallback_model = MODEL
294
+ resp = gemini_client.models.generate_content(
295
+ model=fallback_model,
296
+ config=types.GenerateContentConfig(system_instruction=system),
297
+ contents=contents,
298
+ )
299
+ return getattr(resp, "text", str(resp))
300
+
301
+ # ---------------- prompts ----------------
302
+ SYSTEM_PROMPT = """You are a Manim CE (0.19.x) code generator/refiner.
303
+ Return ONLY valid Python code (no backticks, no prose).
304
+ Define exactly one class: AutoScene(Scene).
305
+ Keep it short (preferably ≤ ~60 s) and quickly renderable.
306
+
307
+ Use: from manim import *
308
+ Allowed imports: manim, math, numpy.
309
+ Forbidden: os, subprocess, sys, requests, pathlib, socket, shutil, psutil, any file/network/OS access.
310
+
311
+ # CAPTURE POLICY (must follow exactly)
312
+ - Insert a comment line `# CAPTURE_POINT` at the final, steady layout of the scene.
313
+ - Right after `# CAPTURE_POINT`, call self.wait(0.75) and then END THE SCENE.
314
+ - DO NOT add any outro animations, fades, or camera moves after `# CAPTURE_POINT`.
315
+ - Ensure all intended elements are visible and legible at `# CAPTURE_POINT` (adequate margins, no overlaps, font ≥ 32 px at 854x480).
316
+
317
+ # Common Manim CE 0.19 API constraints (must follow)
318
+ - Do NOT use `vertex=` with RightAngle(...). Choose the corner by line ordering or set quadrant=(±1, ±1).
319
+ - Do NOT call `.to_center()` (not a valid method). Use `.center()` or `.move_to(ORIGIN)`.
320
+ - Prefer `.move_to()`, `.align_to()`, `.to_edge()`, `.scale()`, `.next_to()` for layout/placement, keeping generous spacing (buff ≥ 0.6) so nothing overlaps.
321
+ - Only introduce objects that directly support the user's request. Avoid decorative or redundant elements that clutter the scene.
322
+ """
323
+
324
+ DEFAULT_SCENE = """from manim import *
325
+
326
+ class AutoScene(Scene):
327
+ def construct(self):
328
+ t = Text("Hello from Manim").scale(1)
329
+ self.play(Write(t))
330
+ # CAPTURE_POINT
331
+ self.wait(0.75)
332
+ """
333
+
334
+ STORYBOARD_SYSTEM_PROMPT = """You are MathFrames' storyboard director.
335
+ You interview educators, refine their ideas, and maintain a structured shot list for a short Manim video.
336
+
337
+ Always respond with a single JSON object matching this schema exactly:
338
+ {
339
+ "reply": "<short conversational answer for the user>",
340
+ "plan": {
341
+ "concept": "<core idea you are visualizing>",
342
+ "notes": "<optional reminders or staging notes>",
343
+ "scenes": [
344
+ {
345
+ "title": "Scene 1: Setup",
346
+ "objective": "<what this scene accomplishes>",
347
+ "steps": ["<bullet-level action>", "..."]
348
+ }
349
+ ]
350
+ },
351
+ "questions": ["<optional clarification question>", "..."]
352
+ }
353
+
354
+ Rules:
355
+ - Keep scene titles in the format: "Scene N: Subtitle".
356
+ - Each scene must list 1-5 clear, imperative steps or beats (use educational language, no code).
357
+ - Reflect any user-provided edits exactly.
358
+ - If the user supplies a plan JSON, treat it as the source of truth and improve it gently.
359
+ - Ask for clarification only when needed; otherwise leave the questions array empty.
360
+ - Never include Markdown fences, prose outside JSON, or code snippets.
361
+
362
+ # Professional editor guidance (use to drive the conversation naturally):
363
+ - Confirm the concept/topic and any subtopics that should appear.
364
+ - Capture the learning goal: what must the viewer understand by the end?
365
+ - Clarify how deep the explanation should go (introductory vs. detailed walk-through).
366
+ - Ask about any specific visuals, references, or prior scenes the user wants included.
367
+ - Check whether there's an existing script or outline to honor.
368
+ - Note any stylistic tone or audience expectations (e.g., middle school vs. college).
369
+ """
370
+
371
+ STORYBOARD_CONFIRM_SYSTEM_PROMPT = """You are MathFrames' storyboard director.
372
+ The user has finalized their plan. Craft the final handoff for the rendering model.
373
+
374
+ Return a JSON object:
375
+ {
376
+ "reply": "<brief confirmation for the user>",
377
+ "render_prompt": "<single paragraph prompt for the Manim code generator>",
378
+ "plan": { ... same structure as provided ... }
379
+ }
380
+
381
+ Guidelines:
382
+ - Keep render_prompt concise but fully descriptive. Mention each scene's purpose and key visuals.
383
+ - Respect the provided storyboard plan exactly—do not invent new scenes or steps.
384
+ - Include relevant settings (style, length, audience, resolution) when supplied.
385
+ - Do not add Markdown or code; respond with JSON only.
386
+ """
387
+
388
+ MAX_STORYBOARD_SCENES = 6
389
+
390
+
391
+ class ScenePayload(BaseModel):
392
+ id: Optional[str] = None
393
+ title: str
394
+ objective: Optional[str] = ""
395
+ steps: List[str]
396
+
397
+ @validator("title", pre=True)
398
+ def _clean_title(cls, value: Any) -> str:
399
+ if isinstance(value, str):
400
+ value = value.strip()
401
+ if not value:
402
+ return "Scene"
403
+ return value
404
+
405
+ @validator("steps", pre=True)
406
+ def _coerce_steps(cls, value: Any) -> List[str]:
407
+ collected: List[str] = []
408
+ if isinstance(value, str):
409
+ candidates = value.replace("\r", "").split("\n")
410
+ collected.extend(candidates)
411
+ elif isinstance(value, (list, tuple)):
412
+ for item in value:
413
+ if isinstance(item, str):
414
+ collected.extend(item.replace("\r", "").split("\n"))
415
+ elif isinstance(item, (list, tuple)):
416
+ for sub in item:
417
+ if isinstance(sub, str):
418
+ collected.append(sub)
419
+ cleaned = []
420
+ for step in collected:
421
+ step = str(step).strip(" •\t-")
422
+ if step:
423
+ cleaned.append(step)
424
+ return cleaned or ["Outline the key idea for this scene."]
425
+
426
+
427
+ class PlanPayload(BaseModel):
428
+ concept: str
429
+ scenes: List[ScenePayload]
430
+ notes: Optional[str] = ""
431
+
432
+ @validator("concept", pre=True)
433
+ def _clean_concept(cls, value: Any) -> str:
434
+ if isinstance(value, str):
435
+ value = value.strip()
436
+ return value or "Untitled Concept"
437
+
438
+ @validator("scenes", pre=True)
439
+ def _ensure_scenes(cls, value: Any) -> List[Any]:
440
+ if isinstance(value, (list, tuple)):
441
+ return list(value)
442
+ return []
443
+
444
+
445
+ class StoryboardChatIn(BaseModel):
446
+ session_id: Optional[str] = None
447
+ message: Optional[str] = ""
448
+ plan: Optional[PlanPayload] = None
449
+ settings: Optional[Dict[str, Any]] = None
450
+
451
+ @validator("message", pre=True, always=True)
452
+ def _default_message(cls, value: Any) -> str:
453
+ if value is None:
454
+ return ""
455
+ return str(value)
456
+
457
+ @validator("settings", pre=True, always=True)
458
+ def _sanitize_settings(cls, value: Any) -> Dict[str, Any]:
459
+ if isinstance(value, dict):
460
+ return value
461
+ return {}
462
+
463
+
464
+ class StoryboardConfirmIn(BaseModel):
465
+ session_id: Optional[str] = None
466
+ plan: PlanPayload
467
+ settings: Optional[Dict[str, Any]] = None
468
+
469
+ @validator("settings", pre=True, always=True)
470
+ def _sanitize_settings(cls, value: Any) -> Dict[str, Any]:
471
+ if isinstance(value, dict):
472
+ return value
473
+ return {}
474
+
475
+
476
+ @dataclass
477
+ class PlanSession:
478
+ session_id: str
479
+ messages: List[Dict[str, Any]] = field(default_factory=list)
480
+ plan: Optional[PlanPayload] = None
481
+ settings: Dict[str, Any] = field(default_factory=dict)
482
+ created_at: float = field(default_factory=time.time)
483
+ updated_at: float = field(default_factory=time.time)
484
+
485
+
486
+ PLAN_SESSIONS: Dict[str, PlanSession] = {}
487
+ PLAN_LOCK = threading.Lock()
488
+
489
+ # ---------- NEW: carry full CLI error back to the refiner ----------
490
+ class RenderError(Exception):
491
+ def __init__(self, log: str):
492
+ super().__init__("Manim render failed")
493
+ self.log = log or ""
494
+
495
+ # ---------------- helpers ----------------
496
+ def _clean_code(text: str) -> str:
497
+ """Strip common Markdown fences like ```python ... ``` or ``` ..."""
498
+ if not text:
499
+ return ""
500
+ text = re.sub(r"^```(?:\s*python)?\s*", "", text.strip(), flags=re.IGNORECASE)
501
+ text = re.sub(r"\s*```$", "", text)
502
+ return text.strip()
503
+
504
+ def _preflight_sanitize(code: str) -> str:
505
+ """
506
+ Auto-correct a few frequent Manim CE 0.19 mistakes to reduce trivial crashes.
507
+ - .to_center() -> .center()
508
+ - Remove vertex=... from RightAngle(...), then normalize commas.
509
+ """
510
+ c = code
511
+ # 1) replace invalid method
512
+ c = re.sub(r"\.to_center\(\)", ".center()", c)
513
+
514
+ # 2) remove vertex=... kwarg inside RightAngle(...)
515
+ # Case A: middle of arg list with trailing comma
516
+ c = re.sub(
517
+ r"(RightAngle\s*\([^)]*?),\s*vertex\s*=\s*[^,)\s]+(\s*,)",
518
+ r"\1\2",
519
+ c,
520
+ flags=re.DOTALL,
521
+ )
522
+ # Case B: last kwarg before ')'
523
+ c = re.sub(
524
+ r"(RightAngle\s*\([^)]*?),\s*vertex\s*=\s*[^,)\s]+(\s*\))",
525
+ r"\1\2",
526
+ c,
527
+ flags=re.DOTALL,
528
+ )
529
+ # Normalize doubled commas or commas before ')'
530
+ c = re.sub(r",\s*,", ", ", c)
531
+ c = re.sub(r",\s*\)", ")", c)
532
+ return c
533
+
534
+
535
+ def _extract_json_dict(raw: str) -> Dict[str, Any]:
536
+ """Best-effort JSON extraction from the LLM response."""
537
+ if not raw:
538
+ raise ValueError("Empty response from model")
539
+ stripped = raw.strip()
540
+ if stripped.startswith("```"):
541
+ stripped = re.sub(r"^```(?:json)?", "", stripped, flags=re.IGNORECASE).strip()
542
+ stripped = re.sub(r"```$", "", stripped).strip()
543
+ try:
544
+ return json.loads(stripped)
545
+ except json.JSONDecodeError:
546
+ match = re.search(r"\{.*\}", stripped, flags=re.DOTALL)
547
+ if match:
548
+ candidate = match.group(0)
549
+ try:
550
+ return json.loads(candidate)
551
+ except json.JSONDecodeError:
552
+ pass
553
+ raise ValueError("Model did not return valid JSON")
554
+
555
+
556
+ def _generate_scene_id(index: int) -> str:
557
+ return f"scene-{index}-{uuid.uuid4().hex[:6]}"
558
+
559
+
560
+ def _normalize_scene_title(index: int, title: str) -> str:
561
+ title = title.strip()
562
+ if not title:
563
+ return f"Scene {index}: Beat"
564
+ prefix = f"Scene {index}"
565
+ if not title.lower().startswith("scene"):
566
+ return f"{prefix}: {title}"
567
+ parts = title.split(":", 1)
568
+ if len(parts) == 2:
569
+ return f"{prefix}: {parts[1].strip()}"
570
+ return f"{prefix}: {title.split(maxsplit=1)[-1]}"
571
+
572
+
573
+ def _sanitize_plan(plan: Optional[PlanPayload], *, concept_hint: str = "Untitled Concept") -> PlanPayload:
574
+ if not plan:
575
+ default_scene = ScenePayload(
576
+ id=_generate_scene_id(1),
577
+ title="Scene 1: Setup",
578
+ objective=f"Introduce {concept_hint}",
579
+ steps=[
580
+ f"Display the title \"{concept_hint}\"",
581
+ "Provide quick context for the viewer",
582
+ "Highlight the main question to explore",
583
+ ],
584
+ )
585
+ return PlanPayload(concept=concept_hint, notes="", scenes=[default_scene])
586
+
587
+ concept = plan.concept.strip() or concept_hint or "Untitled Concept"
588
+ sanitized_scenes: List[ScenePayload] = []
589
+ for idx, scene in enumerate(plan.scenes[:MAX_STORYBOARD_SCENES], start=1):
590
+ steps = [str(step).strip() for step in scene.steps if step and str(step).strip()]
591
+ if not steps:
592
+ steps = [f"Explain the next idea for {concept}."]
593
+ title = _normalize_scene_title(idx, scene.title or f"Scene {idx}")
594
+ objective = (scene.objective or "").strip()
595
+ sanitized_scenes.append(
596
+ ScenePayload(
597
+ id=scene.id or _generate_scene_id(idx),
598
+ title=title,
599
+ objective=objective or f"Advance the story about {concept}.",
600
+ steps=steps,
601
+ )
602
+ )
603
+
604
+ if not sanitized_scenes:
605
+ sanitized_scenes.append(
606
+ ScenePayload(
607
+ id=_generate_scene_id(1),
608
+ title="Scene 1: Setup",
609
+ objective=f"Introduce {concept}",
610
+ steps=[
611
+ f"Present the main idea \"{concept}\"",
612
+ "Explain why it matters to the viewer",
613
+ ],
614
+ )
615
+ )
616
+
617
+ notes = (plan.notes or "").strip()
618
+ return PlanPayload(concept=concept, notes=notes, scenes=sanitized_scenes)
619
+
620
+
621
+ def _plan_to_public_dict(plan: PlanPayload) -> Dict[str, Any]:
622
+ return plan.dict()
623
+
624
+
625
+ def _format_conversation(messages: List[Dict[str, Any]], limit: int = 8) -> str:
626
+ if not messages:
627
+ return "None yet."
628
+ recent = messages[-limit:]
629
+ lines = []
630
+ for msg in recent:
631
+ role = msg.get("role", "assistant").title()
632
+ content = str(msg.get("content", "")).strip()
633
+ lines.append(f"{role}: {content}")
634
+ return "\n".join(lines)
635
+
636
+
637
+ def _audience_label(value: Optional[str]) -> Optional[str]:
638
+ mapping = {
639
+ "ms": "middle school students",
640
+ "hs": "high school students",
641
+ "ug": "undergraduate students",
642
+ }
643
+ return mapping.get(str(value).lower()) if value else None
644
+
645
+
646
+ def _style_label(value: Optional[str]) -> Optional[str]:
647
+ mapping = {
648
+ "minimal": "minimal visuals (focus on narration and a few key elements)",
649
+ "steps": "step-by-step exposition with clear transitions",
650
+ "geometry": "geometry-focused visuals that highlight shapes and spatial relationships",
651
+ }
652
+ return mapping.get(str(value).lower()) if value else None
653
+
654
+
655
+ def _length_label(value: Optional[str]) -> Optional[str]:
656
+ mapping = {
657
+ "short": "short (~30–45s)",
658
+ "medium": "medium (~60–90s)",
659
+ }
660
+ return mapping.get(str(value).lower()) if value else None
661
+
662
+
663
+ def _quality_from_settings(settings: Optional[Dict[str, Any]]) -> str:
664
+ if not settings:
665
+ return "medium"
666
+ resolution = str(settings.get("resolution", "")).lower()
667
+ if resolution == "480p":
668
+ return "low"
669
+ if resolution == "1080p":
670
+ return "high"
671
+ return "medium"
672
+
673
+
674
+ def _quality_flag(quality: str) -> str:
675
+ return {
676
+ "low": "-ql",
677
+ "medium": "-qm",
678
+ "high": "-qh",
679
+ }.get(quality, "-qm")
680
+
681
+
682
+ def _compose_default_render_prompt(plan: PlanPayload, settings: Dict[str, Any], conversation: List[Dict[str, Any]]) -> str:
683
+ lines = [
684
+ f"Create a concise Manim CE 0.19 scene illustrating the concept \"{plan.concept}\".",
685
+ "Structure the animation around these storyboard scenes:",
686
+ ]
687
+ for scene in plan.scenes:
688
+ lines.append(f"- {scene.title} ({scene.objective})")
689
+ for step in scene.steps:
690
+ lines.append(f" • {step}")
691
+ if plan.notes:
692
+ lines.append(f"Production notes: {plan.notes}")
693
+ if settings:
694
+ audience_text = _audience_label(settings.get("audience"))
695
+ style_text = _style_label(settings.get("style"))
696
+ length_text = _length_label(settings.get("length"))
697
+ lines.append("Production settings to honor:")
698
+ if audience_text:
699
+ lines.append(f"- Tailor explanations for {audience_text} (language, pacing, assumptions).")
700
+ if style_text:
701
+ lines.append(f"- Presentation style: {style_text}.")
702
+ if length_text:
703
+ lines.append(f"- Keep total runtime {length_text}.")
704
+ resolution = settings.get("resolution")
705
+ if resolution:
706
+ lines.append(f"- Render for {resolution} output (frame layout should read well at that resolution).")
707
+ if conversation:
708
+ lines.append("Incorporate the important constraints already discussed with the user.")
709
+ lines.append("Follow the CAPTURE policy: include # CAPTURE_POINT just before the final self.wait(0.75).")
710
+ return "\n".join(lines)
711
+
712
+
713
+ def _prune_plan_sessions(max_sessions: int = 200, max_age_seconds: int = 3600) -> None:
714
+ now = time.time()
715
+ with PLAN_LOCK:
716
+ if len(PLAN_SESSIONS) > max_sessions:
717
+ sorted_items = sorted(PLAN_SESSIONS.items(), key=lambda item: item[1].updated_at)
718
+ for session_id, _ in sorted_items[: len(PLAN_SESSIONS) - max_sessions]:
719
+ PLAN_SESSIONS.pop(session_id, None)
720
+ for session_id, session in list(PLAN_SESSIONS.items()):
721
+ if now - session.updated_at > max_age_seconds:
722
+ PLAN_SESSIONS.pop(session_id, None)
723
+
724
+
725
+ def _get_or_create_session(session_id: Optional[str], settings: Optional[Dict[str, Any]] = None) -> PlanSession:
726
+ with PLAN_LOCK:
727
+ if session_id and session_id in PLAN_SESSIONS:
728
+ session = PLAN_SESSIONS[session_id]
729
+ if settings:
730
+ session.settings.update(settings)
731
+ return session
732
+ new_id = session_id or uuid.uuid4().hex
733
+ session = PlanSession(session_id=new_id)
734
+ if settings:
735
+ session.settings.update(settings)
736
+ PLAN_SESSIONS[new_id] = session
737
+ _prune_plan_sessions()
738
+ return session
739
+
740
+
741
+ def _storyboard_model_reply(session: PlanSession, user_message: str) -> Tuple[str, PlanPayload, List[str]]:
742
+ concept_hint = session.plan.concept if session.plan else (user_message.strip() or "Untitled Concept")
743
+ session.plan = _sanitize_plan(session.plan, concept_hint=concept_hint)
744
+ session.updated_at = time.time()
745
+ plan_json = json.dumps(_plan_to_public_dict(session.plan), indent=2)
746
+ settings_json = json.dumps(session.settings or {}, indent=2)
747
+ history_text = _format_conversation(session.messages)
748
+ latest_message = user_message.strip() or "User adjusted the storyboard without additional text."
749
+ contents = f"""You are refining a math animation storyboard with the user.
750
+ Current storyboard plan JSON:
751
+ {plan_json}
752
+
753
+ Session settings:
754
+ {settings_json}
755
+
756
+ Conversation so far:
757
+ {history_text}
758
+
759
+ Update the plan if needed and craft your reply (JSON only). Latest user message:
760
+ {latest_message}
761
+ """
762
+ raw_response = gemini_small_call(system=STORYBOARD_SYSTEM_PROMPT, contents=contents)
763
+ try:
764
+ parsed = _extract_json_dict(raw_response)
765
+ except Exception as exc:
766
+ print("Storyboard model JSON parse failed:", exc, file=sys.stderr)
767
+ parsed = {}
768
+
769
+ reply_text = str(parsed.get("reply") or "").strip() or "Understood—updating the storyboard."
770
+ plan_data = parsed.get("plan")
771
+ new_plan = session.plan
772
+ if isinstance(plan_data, dict):
773
+ try:
774
+ new_plan = PlanPayload(**plan_data)
775
+ except Exception as exc:
776
+ print("Unable to parse plan from storyboard model:", exc, file=sys.stderr)
777
+ session.plan = _sanitize_plan(new_plan, concept_hint=session.plan.concept if session.plan else concept_hint)
778
+ questions_field = parsed.get("questions") or []
779
+ questions = [str(q).strip() for q in questions_field if isinstance(q, (str, int)) and str(q).strip()]
780
+ session.updated_at = time.time()
781
+ return reply_text, session.plan, questions
782
+
783
+
784
+ def _storyboard_model_confirm(session: PlanSession) -> Tuple[str, PlanPayload, str]:
785
+ session.plan = _sanitize_plan(session.plan, concept_hint=session.plan.concept if session.plan else "Untitled Concept")
786
+ plan_json = json.dumps(_plan_to_public_dict(session.plan), indent=2)
787
+ settings_json = json.dumps(session.settings or {}, indent=2)
788
+ history_text = _format_conversation(session.messages)
789
+ contents = f"""The user has approved this storyboard plan:
790
+ {plan_json}
791
+
792
+ Session settings:
793
+ {settings_json}
794
+
795
+ Conversation summary:
796
+ {history_text}
797
+
798
+ Produce the confirmation JSON only (no Markdown)."""
799
+ raw_response = gemini_small_call(system=STORYBOARD_CONFIRM_SYSTEM_PROMPT, contents=contents)
800
+ try:
801
+ parsed = _extract_json_dict(raw_response)
802
+ except Exception as exc:
803
+ print("Storyboard confirm JSON parse failed:", exc, file=sys.stderr)
804
+ parsed = {}
805
+
806
+ reply_text = str(parsed.get("reply") or "").strip() or "Great! Locking the storyboard and preparing the renderer."
807
+ plan_data = parsed.get("plan")
808
+ final_plan = session.plan
809
+ if isinstance(plan_data, dict):
810
+ try:
811
+ final_plan = PlanPayload(**plan_data)
812
+ except Exception as exc:
813
+ print("Unable to parse confirmed plan:", exc, file=sys.stderr)
814
+ final_plan = _sanitize_plan(final_plan, concept_hint=final_plan.concept if final_plan else session.plan.concept)
815
+ render_prompt = str(parsed.get("render_prompt") or "").strip()
816
+ if not render_prompt:
817
+ render_prompt = _compose_default_render_prompt(final_plan, session.settings, session.messages)
818
+ session.plan = final_plan
819
+ session.updated_at = time.time()
820
+ return reply_text, final_plan, render_prompt
821
+
822
+ def _run_manim(scene_code: str, run_id: Optional[str] = None, quality: str = "medium") -> Tuple[bytes, Optional[Path]]:
823
+ """Render MP4 (fast) and also save a steady-state PNG (last frame)."""
824
+ run_id = run_id or str(uuid.uuid4())[:8]
825
+ work = RUNS / run_id; work.mkdir(parents=True, exist_ok=True)
826
+ media = work / "media"; media.mkdir(parents=True, exist_ok=True)
827
+ scene_path = work / "scene.py"
828
+
829
+ # Write scene code (after sanitizer)
830
+ safe_code = _preflight_sanitize(scene_code)
831
+ scene_path.write_text(safe_code, encoding="utf-8")
832
+
833
+ env = os.environ.copy()
834
+ env["PYTHONPATH"] = str(work)
835
+
836
+ quality_flag = _quality_flag(quality)
837
+
838
+ # 1) Render video
839
+ cmd_video = [
840
+ "manim", quality_flag, "--disable_caching",
841
+ "--media_dir", str(media),
842
+ "-o", f"{run_id}.mp4",
843
+ str(scene_path), "AutoScene",
844
+ ]
845
+ proc_v = subprocess.run(
846
+ cmd_video,
847
+ stdout=subprocess.PIPE,
848
+ stderr=subprocess.STDOUT,
849
+ text=True,
850
+ env=env,
851
+ )
852
+ if proc_v.returncode != 0:
853
+ log = proc_v.stdout or ""
854
+ print("Manim stdout/stderr:\n", log, file=sys.stderr)
855
+ raise RenderError(log)
856
+
857
+ # Locate output mp4
858
+ mp4 = None
859
+ for p in media.rglob(f"{run_id}.mp4"):
860
+ mp4 = p; break
861
+ if not mp4:
862
+ for p in media.rglob("*.mp4"):
863
+ mp4 = p; break
864
+ if not mp4:
865
+ raise RenderError("Rendered video not found")
866
+
867
+ # 2) Save last frame PNG (leverages our CAPTURE_POINT rule)
868
+ png_path = None
869
+ cmd_png = [
870
+ "manim", quality_flag, "--disable_caching", "-s", # -s saves the last frame as an image
871
+ "--media_dir", str(media),
872
+ str(scene_path), "AutoScene",
873
+ ]
874
+ proc_p = subprocess.run(
875
+ cmd_png,
876
+ stdout=subprocess.PIPE,
877
+ stderr=subprocess.STDOUT,
878
+ text=True,
879
+ env=env,
880
+ )
881
+ if proc_p.returncode == 0:
882
+ cand = None
883
+ for p in media.rglob("*.png"):
884
+ cand = p
885
+ png_path = cand
886
+
887
+ return mp4.read_bytes(), png_path
888
+
889
+ def _upload_image_to_gemini(png_path: Path):
890
+ """Prepare an inline data URI that the OpenAI vision API accepts."""
891
+ if not gemini_client or not png_path or not png_path.exists():
892
+ return None
893
+ limiter.acquire()
894
+ with open(png_path, "rb") as f:
895
+ file_ref = gemini_client.files.upload(
896
+ file=f,
897
+ config={"mime_type": "image/png"},
898
+ )
899
+ return file_ref
900
+
901
+
902
+ def llm_generate_manim_code(
903
+ prompt: str,
904
+ settings: Optional[Dict[str, Any]] = None,
905
+ previous_code: Optional[str] = None,
906
+ ) -> str:
907
+ """First-pass generation (capture-aware)."""
908
+ if not gemini_client:
909
+ return DEFAULT_SCENE
910
+ try:
911
+ contents = f"Create AutoScene for: {prompt}\nRemember the CAPTURE POLICY and Common API constraints."
912
+ if settings:
913
+ audience_text = _audience_label(settings.get("audience"))
914
+ style_text = _style_label(settings.get("style"))
915
+ length_text = _length_label(settings.get("length"))
916
+ contents += "\nProduction settings to respect:"
917
+ if audience_text:
918
+ contents += f"\n- Tailor explanations for {audience_text}."
919
+ if style_text:
920
+ contents += f"\n- Style: {style_text}."
921
+ if length_text:
922
+ contents += f"\n- Target runtime: {length_text}."
923
+ resolution = settings.get("resolution")
924
+ if resolution:
925
+ contents += f"\n- Design visuals that read clearly at {resolution}."
926
+ contents += "\nLayout requirement: ensure every element has clear separation—absolutely no overlaps at the capture point."
927
+ contents += "\nKeep the composition minimal: only include elements explicitly needed for the prompt."
928
+ response_text = gemini_call(system=SYSTEM_PROMPT, contents=contents)
929
+ code = _clean_code(response_text)
930
+ if "class AutoScene" not in code:
931
+ code = previous_code or DEFAULT_SCENE
932
+ return code
933
+ except Exception:
934
+ print("LLM generate error:", file=sys.stderr)
935
+ traceback.print_exc()
936
+ return previous_code or DEFAULT_SCENE
937
+
938
+ def llm_refine_from_error(
939
+ previous_code: str,
940
+ error_message: str,
941
+ original_user_prompt: str,
942
+ settings: Optional[Dict[str, Any]] = None,
943
+ ) -> str:
944
+ """When Manim fails; send the *real* CLI log/trace to the LLM."""
945
+ if not gemini_client:
946
+ return previous_code or DEFAULT_SCENE
947
+ try:
948
+ trimmed = error_message[-4000:] if error_message else ""
949
+ user_prompt = f"""Original user prompt:
950
+ {original_user_prompt}
951
+
952
+ The following Manim CE (0.19.x) code failed to render. Fix it.
953
+
954
+ Current code:
955
+ {previous_code}
956
+
957
+ Error / stack trace (tail):
958
+ {trimmed}
959
+
960
+ Requirements:
961
+ - Fix the bug while preserving the math logic and planned animations.
962
+ - Keep exactly one class AutoScene(Scene).
963
+ - Keep the CAPTURE POLICY and ensure # CAPTURE_POINT is at the final steady layout.
964
+ - Eliminate any overlapping elements; maintain clear spacing at the capture point.
965
+ - Remove any objects that are not necessary for the prompt or storyboard; keep the scene concise.
966
+ - Scan for nonexistent methods (e.g., `.to_center`) or invalid kwargs (e.g., `vertex=` on RightAngle) and replace with valid Manim CE 0.19 API.
967
+ - Prefer `.center()`/`.move_to(ORIGIN)`, and `.move_to()`, `.align_to()`, `.to_edge()`, `.next_to()` for layout.
968
+ - Apply the smallest change necessary to resolve the failure; do not overhaul structure, pacing, or stylistic choices the user made.
969
+ - Preserve all existing text content (titles, labels, strings) unless it directly causes the error.
970
+ - Do not alter functional math/logic that already works; only touch the problematic lines needed for a successful render.
971
+ - Return ONLY the corrected Python code (no backticks).
972
+ """
973
+ if settings:
974
+ audience_text = _audience_label(settings.get("audience"))
975
+ style_text = _style_label(settings.get("style"))
976
+ length_text = _length_label(settings.get("length"))
977
+ extra = "\nProduction targets to preserve:"
978
+ if audience_text:
979
+ extra += f"\n- Audience: {audience_text}."
980
+ if style_text:
981
+ extra += f"\n- Style: {style_text}."
982
+ if length_text:
983
+ extra += f"\n- Runtime goal: {length_text}."
984
+ resolution = settings.get("resolution")
985
+ if resolution:
986
+ extra += f"\n- Ensure layout reads clearly at {resolution}."
987
+ user_prompt += extra
988
+ response_text = gemini_call(system=SYSTEM_PROMPT, contents=user_prompt)
989
+ code = _clean_code(response_text)
990
+ if "class AutoScene" not in code:
991
+ return previous_code or DEFAULT_SCENE
992
+ return code
993
+ except Exception:
994
+ print("LLM refine error:", file=sys.stderr)
995
+ traceback.print_exc()
996
+ return previous_code or DEFAULT_SCENE
997
+
998
+ def llm_visual_refine_from_image(
999
+ original_user_prompt: str,
1000
+ previous_code: str,
1001
+ png_path: Optional[Path],
1002
+ settings: Optional[Dict[str, Any]] = None,
1003
+ ) -> str:
1004
+ """
1005
+ Use the screenshot to request layout/legibility/placement fixes.
1006
+ Includes the original prompt and current code, and asks for minimal edits.
1007
+ """
1008
+ if not gemini_client or not png_path or not png_path.exists():
1009
+ return previous_code
1010
+ try:
1011
+ file_ref = _upload_image_to_gemini(png_path)
1012
+ if not file_ref:
1013
+ return previous_code
1014
+
1015
+ visual_prompt = f"""You are refining a Manim CE (0.19.x) scene based on its steady-state screenshot.
1016
+ Original user prompt:
1017
+ {original_user_prompt}
1018
+
1019
+ Current Manim code:
1020
+ {previous_code}
1021
+
1022
+ Tasks (optimize for readability and visual quality without changing the math meaning):
1023
+ - Fix layout issues (overlaps, cramped margins, alignment, consistent scaling).
1024
+ - Improve text legibility (minimum size ~32 px at 854x480, adequate contrast).
1025
+ - Ensure all intended elements are visible at the capture point.
1026
+ - Remove any overlapping elements; keep generous spacing between visuals.
1027
+ - Remove decorative or redundant elements that are not required by the user's prompt or storyboard.
1028
+ - Keep animation semantics as-is unless they're obviously broken.
1029
+ - Keep exactly one class AutoScene(Scene).
1030
+ - Preserve the CAPTURE POLICY and place `# CAPTURE_POINT` at the final steady layout with self.wait(0.75) and NO outro after that.
1031
+ - Make the minimal adjustments needed to fix readability; do not rework the overall composition or pacing beyond what the user already authored.
1032
+ - Preserve all text labels, titles, and strings as written unless they directly cause overlap/legibility issues.
1033
+ - Avoid rewriting functioning math/logic—only adjust positioning, styling, or other elements required to fix the visual defect.
1034
+ Return ONLY the revised Python code (no backticks).
1035
+ """
1036
+ if settings:
1037
+ audience_text = _audience_label(settings.get("audience"))
1038
+ style_text = _style_label(settings.get("style"))
1039
+ length_text = _length_label(settings.get("length"))
1040
+ visual_prompt += "\nKeep these production settings in mind:"
1041
+ if audience_text:
1042
+ visual_prompt += f"\n- Audience: {audience_text}."
1043
+ if style_text:
1044
+ visual_prompt += f"\n- Style: {style_text}."
1045
+ if length_text:
1046
+ visual_prompt += f"\n- Runtime target: {length_text}."
1047
+ resolution = settings.get("resolution")
1048
+ if resolution:
1049
+ visual_prompt += f"\n- Layout should stay readable at {resolution}."
1050
+
1051
+ response_text = gemini_call(system=SYSTEM_PROMPT, contents=[file_ref, visual_prompt])
1052
+ code = _clean_code(response_text)
1053
+ if "class AutoScene" not in code:
1054
+ return previous_code
1055
+ return code
1056
+ except Exception:
1057
+ print("LLM visual refine error:", file=sys.stderr)
1058
+ traceback.print_exc()
1059
+ return previous_code
1060
+
1061
+
1062
+ def _attempt_render_with_refine(
1063
+ base_code: str,
1064
+ *,
1065
+ user_prompt: str,
1066
+ settings: Optional[Dict[str, Any]],
1067
+ quality: str,
1068
+ run_prefix: str,
1069
+ max_refines: int,
1070
+ ) -> Tuple[Optional[str], Optional[bytes], Optional[Path], str]:
1071
+ """
1072
+ Try to render `base_code`, refining up to `max_refines` times using Gemini on failure.
1073
+ Returns tuple: (final_code, video_bytes, png_path, last_error_log).
1074
+ If rendering still fails, code/video/png are None and last_error_log carries the last trace.
1075
+ """
1076
+ attempts = 0
1077
+ current_code = base_code
1078
+ last_log = ""
1079
+
1080
+ while True:
1081
+ try:
1082
+ mp4_bytes, png_path = _run_manim(
1083
+ current_code,
1084
+ run_id=f"{run_prefix}_try{attempts}",
1085
+ quality=quality,
1086
+ )
1087
+ return current_code, mp4_bytes, png_path, ""
1088
+ except RenderError as err:
1089
+ last_log = err.log or last_log
1090
+ except Exception:
1091
+ last_log = traceback.format_exc()
1092
+
1093
+ if attempts >= max_refines:
1094
+ return None, None, None, last_log
1095
+
1096
+ attempts += 1
1097
+ current_code = llm_refine_from_error(
1098
+ previous_code=current_code,
1099
+ error_message=last_log,
1100
+ original_user_prompt=user_prompt,
1101
+ settings=settings,
1102
+ )
1103
+
1104
+ def refine_loop(
1105
+ user_prompt: str,
1106
+ settings: Optional[Dict[str, Any]] = None,
1107
+ max_error_refines: int = 3,
1108
+ do_visual_refine: bool = False,
1109
+ ) -> bytes:
1110
+ """
1111
+ Generate → render; on error, refine up to N times from Manim traceback → re-render.
1112
+ If first render succeeds and do_visual_refine==True, run an image-based refinement
1113
+ using the saved steady-state PNG, then re-render. Fallback to the best successful MP4.
1114
+ """
1115
+ # 1) initial generation (capture-aware)
1116
+ initial_code = llm_generate_manim_code(user_prompt, settings=settings)
1117
+ quality = _quality_from_settings(settings)
1118
+
1119
+ code, mp4_bytes, png_path, last_log = _attempt_render_with_refine(
1120
+ initial_code,
1121
+ user_prompt=user_prompt,
1122
+ settings=settings,
1123
+ quality=quality,
1124
+ run_prefix="primary",
1125
+ max_refines=max_error_refines,
1126
+ )
1127
+
1128
+ if code is None:
1129
+ print("Primary render failed after refinements; generating fallback code...", file=sys.stderr)
1130
+ fallback_code = llm_generate_manim_code(user_prompt, settings=settings)
1131
+ code, mp4_bytes, png_path, last_log = _attempt_render_with_refine(
1132
+ fallback_code,
1133
+ user_prompt=user_prompt,
1134
+ settings=settings,
1135
+ quality=quality,
1136
+ run_prefix="fallback",
1137
+ max_refines=2,
1138
+ )
1139
+ if code is None:
1140
+ error_message = last_log or "Render failed after fallback attempts."
1141
+ raise RenderError(error_message)
1142
+
1143
+ # 3) optional visual refinement loop
1144
+ if do_visual_refine and png_path and png_path.exists():
1145
+ refined2 = llm_visual_refine_from_image(
1146
+ original_user_prompt=user_prompt,
1147
+ previous_code=code,
1148
+ png_path=png_path,
1149
+ settings=settings,
1150
+ )
1151
+ if refined2.strip() != code.strip():
1152
+ try:
1153
+ mp4_bytes2, _ = _run_manim(refined2, run_id="iter2", quality=quality)
1154
+ return mp4_bytes2
1155
+ except Exception:
1156
+ print("Visual refine render failed; returning best known render.", file=sys.stderr)
1157
+ return mp4_bytes
1158
+
1159
+ return mp4_bytes
1160
+
1161
+
1162
+ def _auto_fix_render(
1163
+ user_prompt: str,
1164
+ code: str,
1165
+ settings: Optional[Dict[str, Any]],
1166
+ initial_log: str,
1167
+ max_attempts: int = 3,
1168
+ ) -> Tuple[Optional[str], Optional[bytes], str]:
1169
+ """Attempt to auto-fix user code via LLM refinement if available."""
1170
+ if not gemini_client:
1171
+ return None, None, initial_log
1172
+ quality = _quality_from_settings(settings)
1173
+ attempt_code = code
1174
+ last_log = initial_log
1175
+ for attempt in range(max_attempts):
1176
+ refined = llm_refine_from_error(
1177
+ previous_code=attempt_code,
1178
+ error_message=last_log,
1179
+ original_user_prompt=user_prompt,
1180
+ settings=settings,
1181
+ )
1182
+ if refined.strip() == attempt_code.strip():
1183
+ break
1184
+ attempt_code = refined
1185
+ try:
1186
+ mp4_bytes, _ = _run_manim(
1187
+ attempt_code,
1188
+ run_id=f"manual_fix_{attempt}",
1189
+ quality=quality,
1190
+ )
1191
+ return attempt_code, mp4_bytes, ""
1192
+ except RenderError as err:
1193
+ last_log = err.log or last_log
1194
+ return None, None, last_log
1195
+
1196
+ # ---------------- API ----------------
1197
+ @app.post("/storyboard/chat")
1198
+ def storyboard_chat(inp: StoryboardChatIn):
1199
+ if not (gpt_client or gemini_client):
1200
+ raise HTTPException(500, "Storyboard model is not configured")
1201
+ if not inp.message.strip() and not inp.plan:
1202
+ raise HTTPException(400, "Message or plan updates are required.")
1203
+
1204
+ session = _get_or_create_session(inp.session_id, inp.settings or {})
1205
+ if inp.settings:
1206
+ session.settings.update(inp.settings)
1207
+
1208
+ if inp.plan:
1209
+ try:
1210
+ session.plan = _sanitize_plan(inp.plan, concept_hint=inp.plan.concept)
1211
+ except Exception as exc:
1212
+ print("Failed to apply user-supplied plan:", exc, file=sys.stderr)
1213
+
1214
+ user_message = inp.message.strip()
1215
+ if user_message:
1216
+ session.messages.append({"role": "user", "content": user_message})
1217
+ else:
1218
+ session.messages.append({"role": "user", "content": "[Plan updated without additional message]"})
1219
+
1220
+ try:
1221
+ reply_text, plan_model, questions = _storyboard_model_reply(session, user_message)
1222
+ except Exception as exc:
1223
+ print("Storyboard chat error:", exc, file=sys.stderr)
1224
+ raise HTTPException(500, "Storyboard assistant failed to respond")
1225
+
1226
+ session.messages.append({"role": "assistant", "content": reply_text})
1227
+ return {
1228
+ "session_id": session.session_id,
1229
+ "reply": reply_text,
1230
+ "plan": plan_model.dict(),
1231
+ "questions": questions,
1232
+ "settings": session.settings,
1233
+ }
1234
+
1235
+
1236
+ @app.post("/storyboard/confirm")
1237
+ def storyboard_confirm(inp: StoryboardConfirmIn):
1238
+ if not (gpt_client or gemini_client):
1239
+ raise HTTPException(500, "Storyboard model is not configured")
1240
+
1241
+ session = _get_or_create_session(inp.session_id, inp.settings or {})
1242
+ if inp.settings:
1243
+ session.settings.update(inp.settings)
1244
+
1245
+ session.plan = _sanitize_plan(inp.plan, concept_hint=inp.plan.concept)
1246
+ session.messages.append({"role": "user", "content": "[User confirmed the storyboard plan]"})
1247
+
1248
+ try:
1249
+ reply_text, final_plan, render_prompt = _storyboard_model_confirm(session)
1250
+ except Exception as exc:
1251
+ print("Storyboard confirm error:", exc, file=sys.stderr)
1252
+ final_plan = session.plan
1253
+ render_prompt = _compose_default_render_prompt(final_plan, session.settings, session.messages)
1254
+ reply_text = "Plan confirmed. Falling back to a templated prompt."
1255
+
1256
+ session.messages.append({"role": "assistant", "content": reply_text})
1257
+ return {
1258
+ "session_id": session.session_id,
1259
+ "reply": reply_text,
1260
+ "render_prompt": render_prompt,
1261
+ "plan": final_plan.dict(),
1262
+ "settings": session.settings,
1263
+ }
1264
+
1265
+
1266
+ class PromptIn(BaseModel):
1267
+ prompt: str
1268
+ settings: Optional[Dict[str, Any]] = None
1269
+
1270
+ @validator("prompt")
1271
+ def _validate_prompt(cls, value: str) -> str:
1272
+ if not value or not value.strip():
1273
+ raise ValueError("Prompt cannot be empty")
1274
+ return value.strip()
1275
+
1276
+ @validator("settings", pre=True, always=True)
1277
+ def _sanitize_settings(cls, value: Any) -> Optional[Dict[str, Any]]:
1278
+ if isinstance(value, dict):
1279
+ return value
1280
+ return None
1281
+
1282
+
1283
+ class GenerateCodeIn(PromptIn):
1284
+ pass
1285
+
1286
+
1287
+ class RenderCodeIn(BaseModel):
1288
+ code: str
1289
+ prompt: Optional[str] = ""
1290
+ settings: Optional[Dict[str, Any]] = None
1291
+ auto_fix: bool = False
1292
+
1293
+ @validator("code")
1294
+ def _validate_code(cls, value: str) -> str:
1295
+ if not value or not value.strip():
1296
+ raise ValueError("Code cannot be empty")
1297
+ return value
1298
+
1299
+ @validator("prompt", pre=True, always=True)
1300
+ def _sanitize_prompt(cls, value: Any) -> str:
1301
+ return str(value or "").strip()
1302
+
1303
+ @validator("settings", pre=True, always=True)
1304
+ def _sanitize_settings(cls, value: Any) -> Optional[Dict[str, Any]]:
1305
+ if isinstance(value, dict):
1306
+ return value
1307
+ return None
1308
+
1309
+ class EmailIn(BaseModel):
1310
+ email: str
1311
+
1312
+ @property
1313
+ def sanitized(self) -> str:
1314
+ return self.email
1315
+
1316
+ @validator("email")
1317
+ def validate_email(cls, value: str) -> str:
1318
+ cleaned = value.strip().lower()
1319
+ if not cleaned:
1320
+ raise ValueError("Email cannot be empty")
1321
+ if not re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", cleaned):
1322
+ raise ValueError("Email is not valid")
1323
+ return cleaned
1324
+
1325
+ @app.get("/")
1326
+ def health():
1327
+ return {
1328
+ "ok": True,
1329
+ "model": MODEL,
1330
+ "has_gemini": bool(gemini_client),
1331
+ "has_gpt": bool(gpt_client),
1332
+ }
1333
+
1334
+ @app.post("/generate-code")
1335
+ def generate_code(inp: GenerateCodeIn):
1336
+ """Return ONLY the generated Manim Python code (no rendering)."""
1337
+ code = llm_generate_manim_code(inp.prompt, settings=inp.settings)
1338
+ return {"code": code}
1339
+
1340
+ @app.post("/generate-and-render")
1341
+ def generate_and_render(inp: PromptIn):
1342
+ try:
1343
+ with acquire_render_slot():
1344
+ mp4 = refine_loop(inp.prompt, settings=inp.settings, max_error_refines=3, do_visual_refine=False)
1345
+ except RuntimeError:
1346
+ raise HTTPException(
1347
+ status_code=503,
1348
+ detail={
1349
+ "error": "queue_busy",
1350
+ "message": "Another render is already running. Please wait a moment and try again.",
1351
+ },
1352
+ )
1353
+ except Exception:
1354
+ raise HTTPException(500, "Failed to produce video after refinement")
1355
+ return Response(
1356
+ content=mp4,
1357
+ media_type="video/mp4",
1358
+ headers={"Content-Disposition": 'inline; filename="result.mp4"'}
1359
+ )
1360
+
1361
+
1362
+ @app.post("/render-code")
1363
+ def render_code(inp: RenderCodeIn):
1364
+ quality = _quality_from_settings(inp.settings)
1365
+ try:
1366
+ with acquire_render_slot():
1367
+ try:
1368
+ mp4_bytes, _ = _run_manim(inp.code, run_id="manual", quality=quality)
1369
+ return Response(
1370
+ content=mp4_bytes,
1371
+ media_type="video/mp4",
1372
+ headers={"Content-Disposition": 'inline; filename="result.mp4"'}
1373
+ )
1374
+ except RenderError as exc:
1375
+ log = exc.log or ""
1376
+ # if False: #not inp.auto_fix:
1377
+ # raise HTTPException(
1378
+ # status_code=400,
1379
+ # detail={
1380
+ # "error": "Render failed",
1381
+ # "message": "Render failed. Attempting automatic fix...",
1382
+ # },
1383
+ # )
1384
+ fixed_code, fixed_video, final_log = _auto_fix_render(
1385
+ user_prompt=inp.prompt or "User-edited Manim code",
1386
+ code=inp.code,
1387
+ settings=inp.settings,
1388
+ initial_log=log,
1389
+ )
1390
+ if fixed_code and fixed_video:
1391
+ payload = {
1392
+ "auto_fixed": True,
1393
+ "message": "Your code triggered a Manim error, so I applied the smallest possible fix (keeping your edits) and reran the render.",
1394
+ "code": fixed_code,
1395
+ "video_base64": base64.b64encode(fixed_video).decode("utf-8"),
1396
+ "video_mime_type": "video/mp4",
1397
+ "files": [
1398
+ {"filename": "scene.py", "contents": fixed_code}
1399
+ ],
1400
+ "meta": {"resolution": inp.settings.get("resolution") if inp.settings else None},
1401
+ "log_tail": (log or "")[-600:]
1402
+ }
1403
+ return Response(
1404
+ content=json.dumps(payload),
1405
+ media_type="application/json",
1406
+ )
1407
+ detail_log = (final_log or log)[-6000:]
1408
+ raise HTTPException(
1409
+ status_code=400,
1410
+ detail={"error": "Render failed", "log": detail_log, "code": inp.code},
1411
+ )
1412
+ except RuntimeError:
1413
+ raise HTTPException(
1414
+ status_code=503,
1415
+ detail={
1416
+ "error": "queue_busy",
1417
+ "message": "Another render is already running. Please wait a moment and try again.",
1418
+ },
1419
+ )
1420
+ except Exception as exc:
1421
+ raise HTTPException(status_code=500, detail={"error": "Unexpected render failure", "log": str(exc)})
1422
+
1423
+ @app.post("/store-email")
1424
+ def store_email(email: EmailIn):
1425
+ """Store the provided email address in the configured Hugging Face dataset."""
1426
+ if not hf_api or not HF_TOKEN:
1427
+ raise HTTPException(500, "Email logging is not configured")
1428
+
1429
+ sanitized_email = email.sanitized
1430
+ timestamp = int(time.time())
1431
+ key = f"emails/{int(time.time() * 1000)}-{uuid.uuid4().hex}.json"
1432
+ payload = {"email": sanitized_email, "ts": timestamp}
1433
+
1434
+ try:
1435
+ hf_api.create_commit(
1436
+ repo_id=HF_DATASET_ID,
1437
+ repo_type="dataset",
1438
+ operations=[
1439
+ CommitOperationAdd(
1440
+ path_in_repo=key,
1441
+ path_or_fileobj=BytesIO(json.dumps(payload).encode("utf-8")),
1442
+ )
1443
+ ],
1444
+ commit_message=f"Log email: {sanitized_email}",
1445
+ token=HF_TOKEN,
1446
+ )
1447
+ except Exception as exc:
1448
+ print("Failed to log email to Hugging Face:", exc, file=sys.stderr)
1449
+ raise HTTPException(500, "Failed to save email address")
1450
+ return {"stored": True, "path": key}