Spaces:
Sleeping
Sleeping
| """ | |
| generate.py | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Text-to-motion generation. | |
| Primary backend: MoMask inference server running on the Vast.ai instance. | |
| Returns [T, 263] HumanML3D features directly β no SMPL | |
| body mesh required. | |
| Fallback backend: HumanML3D dataset keyword search (offline / no GPU needed). | |
| Usage | |
| βββββ | |
| from Retarget.generate import generate_motion | |
| # Use MoMask on instance | |
| motion = generate_motion("a person walks forward", | |
| backend_url="http://ssh4.vast.ai:8765") | |
| # Local fallback (streams HuggingFace dataset) | |
| motion = generate_motion("a person walks forward") | |
| # Returned motion: np.ndarray [T, 263] | |
| # Feed directly to animate_glb() | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import numpy as np | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Public API | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_motion( | |
| prompt: str, | |
| backend_url: str | None = None, | |
| num_frames: int = 196, | |
| fps: float = 20.0, | |
| seed: int = -1, | |
| ) -> np.ndarray: | |
| """ | |
| Generate a HumanML3D [T, 263] motion array from a text prompt. | |
| Parameters | |
| ---------- | |
| prompt | |
| Natural language description of the desired motion. | |
| Examples: "a person walks forward", "someone does a jumping jack", | |
| "a man waves hello with his right hand" | |
| backend_url | |
| URL of the MoMask inference server. E.g. "http://ssh4.vast.ai:8765". | |
| If None or if the server is unreachable, falls back to dataset search. | |
| num_frames | |
| Desired clip length in frames (at 20 fps; max ~196 β 9.8 s). | |
| fps | |
| Target fps (MoMask natively produces 20 fps). | |
| seed | |
| Random seed for reproducibility (-1 = random). | |
| Returns | |
| ------- | |
| np.ndarray shape [T, 263] HumanML3D feature vector. | |
| """ | |
| if backend_url: | |
| try: | |
| return _call_momask(prompt, backend_url, num_frames, seed) | |
| except Exception as exc: | |
| print(f"[generate] MoMask unreachable ({exc}) β falling back to dataset search") | |
| return _dataset_search_fallback(prompt) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MoMask backend | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _call_momask( | |
| prompt: str, | |
| url: str, | |
| num_frames: int, | |
| seed: int, | |
| ) -> np.ndarray: | |
| """POST to the MoMask inference server; return [T, 263] array.""" | |
| import urllib.request | |
| payload = json.dumps({ | |
| "prompt": prompt, | |
| "num_frames": num_frames, | |
| "seed": seed, | |
| }).encode("utf-8") | |
| req = urllib.request.Request( | |
| f"{url.rstrip('/')}/generate", | |
| data=payload, | |
| headers={"Content-Type": "application/json"}, | |
| method="POST", | |
| ) | |
| with urllib.request.urlopen(req, timeout=180) as resp: | |
| result = json.loads(resp.read()) | |
| motion = np.array(result["motion"], dtype=np.float32) | |
| if motion.ndim != 2 or motion.shape[1] < 193: | |
| raise ValueError(f"Server returned unexpected shape {motion.shape}") | |
| print(f"[generate] MoMask: {motion.shape[0]} frames for '{prompt}'") | |
| return motion | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Dataset search fallback | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _dataset_search_fallback(prompt: str) -> np.ndarray: | |
| """ | |
| Keyword search in TeoGchx/HumanML3D dataset (streaming, HuggingFace). | |
| Used when no MoMask server is available. | |
| """ | |
| from .search import search_motions, format_choice_label | |
| print(f"[generate] Searching HumanML3D dataset for: '{prompt}'") | |
| results = search_motions(prompt, top_k=5, split="test", max_scan=500) | |
| if not results: | |
| raise RuntimeError( | |
| f"No motion found in dataset for prompt: {prompt!r}\n" | |
| "Check your internet connection or deploy MoMask on the instance." | |
| ) | |
| best = results[0] | |
| print(f"[generate] Best match: {format_choice_label(best)}") | |
| return np.array(best["motion"], dtype=np.float32) | |