Jack Wu commited on
Commit Β·
ebe8a5c
1
Parent(s): 4f8616d
feat: introduce checkpoint mirroring script, strengthen video validation, and improve pipeline robustness for masking and compositing.
Browse files- app.py +45 -3
- pipeline/composite.py +9 -1
- pipeline/crop.py +18 -5
- pipeline/lama.py +8 -2
- pipeline/video.py +41 -6
- requirements.txt +1 -1
- scripts/mirror_checkpoints.py +138 -0
app.py
CHANGED
|
@@ -265,7 +265,25 @@ def _meta_to_dict(meta: VideoMeta) -> dict:
|
|
| 265 |
|
| 266 |
|
| 267 |
def _dict_to_meta(d: dict) -> VideoMeta:
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
|
| 271 |
# ---------------------------------------------------------------------------
|
|
@@ -281,13 +299,23 @@ def on_video_upload(video_path: str | None):
|
|
| 281 |
meta = probe(video_path)
|
| 282 |
|
| 283 |
# ββ Input validation β guard against disk exhaustion on ZeroGPU ββ
|
| 284 |
-
MAX_DURATION_S =
|
| 285 |
MAX_PIXELS = 1920 * 1080
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
if meta.duration_s > MAX_DURATION_S:
|
| 287 |
return (
|
| 288 |
gr.update(), gr.update(), None,
|
| 289 |
f"β Clip too long ({meta.duration_s:.1f}s). Max {MAX_DURATION_S:.0f} seconds.",
|
| 290 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
if meta.width * meta.height > MAX_PIXELS:
|
| 292 |
return (
|
| 293 |
gr.update(), gr.update(), None,
|
|
@@ -481,7 +509,11 @@ def run_pipeline(
|
|
| 481 |
)
|
| 482 |
|
| 483 |
with VideoWorkspace() as ws:
|
| 484 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
shutil.copy2(video_path, safe_video)
|
| 486 |
|
| 487 |
# ββ Extract frames (CFR-forced for VFR safety) βββββββββββββββββ
|
|
@@ -490,6 +522,16 @@ def run_pipeline(
|
|
| 490 |
total = len(frame_paths)
|
| 491 |
|
| 492 |
# ββ GPU: inpaint + composite + save ββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
progress(0.15, desc="Starting inpaintingβ¦")
|
| 494 |
_inpaint_composite_save_gpu(
|
| 495 |
frame_paths, crop_region, inpaint_mask,
|
|
|
|
| 265 |
|
| 266 |
|
| 267 |
def _dict_to_meta(d: dict) -> VideoMeta:
|
| 268 |
+
"""Reconstruct a VideoMeta from a dict, ignoring unknown keys.
|
| 269 |
+
|
| 270 |
+
The dict lives in Gradio ``State`` and may contain extra fields if a
|
| 271 |
+
user has a cached session from an older or newer version of the app.
|
| 272 |
+
Passing ``**d`` directly would raise ``TypeError`` on unexpected keys.
|
| 273 |
+
"""
|
| 274 |
+
return VideoMeta(
|
| 275 |
+
width=d["width"],
|
| 276 |
+
height=d["height"],
|
| 277 |
+
fps=d["fps"],
|
| 278 |
+
frame_count=d["frame_count"],
|
| 279 |
+
duration_s=d["duration_s"],
|
| 280 |
+
color_primaries=d.get("color_primaries"),
|
| 281 |
+
color_trc=d.get("color_trc"),
|
| 282 |
+
colorspace=d.get("colorspace"),
|
| 283 |
+
color_range=d.get("color_range"),
|
| 284 |
+
codec_name=d.get("codec_name", "unknown"),
|
| 285 |
+
bit_depth=d.get("bit_depth", 8),
|
| 286 |
+
)
|
| 287 |
|
| 288 |
|
| 289 |
# ---------------------------------------------------------------------------
|
|
|
|
| 299 |
meta = probe(video_path)
|
| 300 |
|
| 301 |
# ββ Input validation β guard against disk exhaustion on ZeroGPU ββ
|
| 302 |
+
MAX_DURATION_S = 15.0
|
| 303 |
MAX_PIXELS = 1920 * 1080
|
| 304 |
+
# Max frames caps videos where ffprobe returns N/A for duration
|
| 305 |
+
# (VFR/container-less formats). duration_s would be 0.0 after our guard,
|
| 306 |
+
# so the duration check alone would pass an arbitrarily long clip.
|
| 307 |
+
MAX_FRAMES = round(MAX_DURATION_S * max(meta.fps, 1.0))
|
| 308 |
if meta.duration_s > MAX_DURATION_S:
|
| 309 |
return (
|
| 310 |
gr.update(), gr.update(), None,
|
| 311 |
f"β Clip too long ({meta.duration_s:.1f}s). Max {MAX_DURATION_S:.0f} seconds.",
|
| 312 |
)
|
| 313 |
+
if meta.frame_count > MAX_FRAMES:
|
| 314 |
+
return (
|
| 315 |
+
gr.update(), gr.update(), None,
|
| 316 |
+
f"β Clip too long ({meta.frame_count} frames at {meta.fps:.2f} fps). "
|
| 317 |
+
f"Max {MAX_DURATION_S:.0f} seconds.",
|
| 318 |
+
)
|
| 319 |
if meta.width * meta.height > MAX_PIXELS:
|
| 320 |
return (
|
| 321 |
gr.update(), gr.update(), None,
|
|
|
|
| 509 |
)
|
| 510 |
|
| 511 |
with VideoWorkspace() as ws:
|
| 512 |
+
# Preserve the original file extension so FFmpeg can detect the container
|
| 513 |
+
# format. Gradio always adds an extension for video uploads, but fall back
|
| 514 |
+
# to .mp4 if the path somehow has none.
|
| 515 |
+
src_suffix = Path(video_path).suffix or ".mp4"
|
| 516 |
+
safe_video = ws.path("source" + src_suffix)
|
| 517 |
shutil.copy2(video_path, safe_video)
|
| 518 |
|
| 519 |
# ββ Extract frames (CFR-forced for VFR safety) βββββββββββββββββ
|
|
|
|
| 522 |
total = len(frame_paths)
|
| 523 |
|
| 524 |
# ββ GPU: inpaint + composite + save ββββββββββββββββββββββββββββ
|
| 525 |
+
# Validate mode on CPU before acquiring GPU so unimplemented modes
|
| 526 |
+
# fail fast without burning ZeroGPU quota.
|
| 527 |
+
_VALID_MODES = ("Fast (LaMa)", "Quality (VACE-14B)")
|
| 528 |
+
if mode not in _VALID_MODES:
|
| 529 |
+
raise gr.Error(f"Unknown mode '{mode}'. Choose from: {_VALID_MODES}")
|
| 530 |
+
if mode == "Quality (VACE-14B)":
|
| 531 |
+
raise gr.Error(
|
| 532 |
+
"VACE-14B quality mode is not yet available. "
|
| 533 |
+
"Please select Fast (LaMa)."
|
| 534 |
+
)
|
| 535 |
progress(0.15, desc="Starting inpaintingβ¦")
|
| 536 |
_inpaint_composite_save_gpu(
|
| 537 |
frame_paths, crop_region, inpaint_mask,
|
pipeline/composite.py
CHANGED
|
@@ -184,11 +184,19 @@ def composite_frames(
|
|
| 184 |
Composited full-frame images (uint8 RGB), one per input frame.
|
| 185 |
"""
|
| 186 |
alpha = feathered_alpha(inpaint_mask, feather_radius)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
return [
|
| 188 |
composite_with_alpha(
|
| 189 |
np.array(Image.open(fp).convert("RGB")), crop, crop_region, alpha
|
| 190 |
)
|
| 191 |
-
for fp, crop in zip(
|
| 192 |
]
|
| 193 |
|
| 194 |
|
|
|
|
| 184 |
Composited full-frame images (uint8 RGB), one per input frame.
|
| 185 |
"""
|
| 186 |
alpha = feathered_alpha(inpaint_mask, feather_radius)
|
| 187 |
+
# Materialise to list so a generator argument isn't consumed by len()
|
| 188 |
+
# before the zip iteration below.
|
| 189 |
+
frame_paths = list(original_frame_paths)
|
| 190 |
+
if len(frame_paths) != len(inpainted_crops):
|
| 191 |
+
raise ValueError(
|
| 192 |
+
f"composite_frames: {len(frame_paths)} frame paths but "
|
| 193 |
+
f"{len(inpainted_crops)} crops β lengths must match."
|
| 194 |
+
)
|
| 195 |
return [
|
| 196 |
composite_with_alpha(
|
| 197 |
np.array(Image.open(fp).convert("RGB")), crop, crop_region, alpha
|
| 198 |
)
|
| 199 |
+
for fp, crop in zip(frame_paths, inpainted_crops)
|
| 200 |
]
|
| 201 |
|
| 202 |
|
pipeline/crop.py
CHANGED
|
@@ -169,6 +169,7 @@ def mask_to_bbox(mask: np.ndarray) -> BBox:
|
|
| 169 |
----------
|
| 170 |
mask : np.ndarray
|
| 171 |
Single-channel mask, dtype uint8. Non-zero pixels = drawn area.
|
|
|
|
| 172 |
|
| 173 |
Returns
|
| 174 |
-------
|
|
@@ -178,8 +179,13 @@ def mask_to_bbox(mask: np.ndarray) -> BBox:
|
|
| 178 |
Raises
|
| 179 |
------
|
| 180 |
ValueError
|
| 181 |
-
If the mask contains no drawn pixels.
|
| 182 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
if mask.ndim == 3:
|
| 184 |
# Accept RGB/RGBA β collapse to single channel
|
| 185 |
mask = mask.max(axis=2)
|
|
@@ -318,9 +324,10 @@ def compute_crop_region(
|
|
| 318 |
target_w = min(target_w, frame_w)
|
| 319 |
target_h = min(target_h, frame_h)
|
| 320 |
|
| 321 |
-
# Round down to multiple of 32 after clamping
|
| 322 |
-
|
| 323 |
-
|
|
|
|
| 324 |
|
| 325 |
# ------------------------------------------------------------------
|
| 326 |
# 3. Centre on watermark centroid, then clamp to frame bounds
|
|
@@ -394,7 +401,13 @@ def build_inpaint_mask(
|
|
| 394 |
y2 = crop_region.frame_y + crop_region.frame_h
|
| 395 |
x1 = crop_region.frame_x
|
| 396 |
x2 = crop_region.frame_x + crop_region.frame_w
|
| 397 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
else:
|
| 399 |
# Fill the watermark bbox rectangle
|
| 400 |
b = crop_region.mask_bbox
|
|
|
|
| 169 |
----------
|
| 170 |
mask : np.ndarray
|
| 171 |
Single-channel mask, dtype uint8. Non-zero pixels = drawn area.
|
| 172 |
+
2D (HΓW) or 3D (HΓWΓC) arrays are accepted; 4D+ is rejected.
|
| 173 |
|
| 174 |
Returns
|
| 175 |
-------
|
|
|
|
| 179 |
Raises
|
| 180 |
------
|
| 181 |
ValueError
|
| 182 |
+
If the mask contains no drawn pixels, or is 4-dimensional or higher.
|
| 183 |
"""
|
| 184 |
+
if mask.ndim > 3:
|
| 185 |
+
raise ValueError(
|
| 186 |
+
f"mask_to_bbox: expected a 2D or 3D mask array, got ndim={mask.ndim}. "
|
| 187 |
+
"Pass a single-frame HΓW or HΓWΓC numpy array."
|
| 188 |
+
)
|
| 189 |
if mask.ndim == 3:
|
| 190 |
# Accept RGB/RGBA β collapse to single channel
|
| 191 |
mask = mask.max(axis=2)
|
|
|
|
| 324 |
target_w = min(target_w, frame_w)
|
| 325 |
target_h = min(target_h, frame_h)
|
| 326 |
|
| 327 |
+
# Round down to multiple of 32 after clamping.
|
| 328 |
+
# max(..., 32) ensures we never produce a 0-dim crop for very small frames.
|
| 329 |
+
target_w = max(_floor_to_multiple(target_w, 32), 32)
|
| 330 |
+
target_h = max(_floor_to_multiple(target_h, 32), 32)
|
| 331 |
|
| 332 |
# ------------------------------------------------------------------
|
| 333 |
# 3. Centre on watermark centroid, then clamp to frame bounds
|
|
|
|
| 401 |
y2 = crop_region.frame_y + crop_region.frame_h
|
| 402 |
x1 = crop_region.frame_x
|
| 403 |
x2 = crop_region.frame_x + crop_region.frame_w
|
| 404 |
+
cropped = source_mask[y1:y2, x1:x2]
|
| 405 |
+
# Binarise: any non-zero value (including raw Gradio layer values
|
| 406 |
+
# like 200 that are not exactly 255) becomes 255.
|
| 407 |
+
binarised = (cropped > 0).astype(np.uint8) * 255
|
| 408 |
+
# Guard: source_mask smaller than crop region causes cropped to be
|
| 409 |
+
# smaller than (frame_h, frame_w). Copy into a zero-padded full mask.
|
| 410 |
+
mask[: binarised.shape[0], : binarised.shape[1]] = binarised
|
| 411 |
else:
|
| 412 |
# Fill the watermark bbox rectangle
|
| 413 |
b = crop_region.mask_bbox
|
pipeline/lama.py
CHANGED
|
@@ -149,5 +149,11 @@ def _load_crop(frame_path: Path, crop_region: CropRegion) -> np.ndarray:
|
|
| 149 |
|
| 150 |
|
| 151 |
def _mask_to_pil(mask: np.ndarray) -> Image.Image:
|
| 152 |
-
"""Convert a uint8 numpy mask to a PIL L-mode image for LaMa.
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
|
| 151 |
def _mask_to_pil(mask: np.ndarray) -> Image.Image:
|
| 152 |
+
"""Convert a 2D uint8 numpy mask to a PIL L-mode image for LaMa.
|
| 153 |
+
|
| 154 |
+
``Image.fromarray`` on a 2D uint8 array automatically produces mode ``'L'``
|
| 155 |
+
(the format LaMa / simple-lama-inpainting expects for the mask input).
|
| 156 |
+
The explicit ``mode="L"`` argument is omitted to avoid the Pillow 13
|
| 157 |
+
deprecation warning for the type-coercion overload of that parameter.
|
| 158 |
+
"""
|
| 159 |
+
return Image.fromarray(mask) # 2D uint8 β 'L' automatically
|
pipeline/video.py
CHANGED
|
@@ -91,7 +91,12 @@ def probe(video_path: str | Path) -> VideoMeta:
|
|
| 91 |
|
| 92 |
# Duration: prefer stream-level, fall back to format-level
|
| 93 |
dur_str = video_stream.get("duration") or data.get("format", {}).get("duration", "0")
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
# Frame count
|
| 97 |
nb_frames = video_stream.get("nb_frames")
|
|
@@ -114,9 +119,17 @@ def probe(video_path: str | Path) -> VideoMeta:
|
|
| 114 |
except (ValueError, TypeError):
|
| 115 |
bit_depth = _bd_from_pix_fmt()
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
return VideoMeta(
|
| 118 |
-
width=int(
|
| 119 |
-
height=int(
|
| 120 |
fps=fps,
|
| 121 |
frame_count=frame_count,
|
| 122 |
duration_s=duration_s,
|
|
@@ -181,7 +194,12 @@ def extract_frames(
|
|
| 181 |
cmd.append(str(out_dir / pattern))
|
| 182 |
_run(cmd)
|
| 183 |
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
if not frames:
|
| 186 |
raise RuntimeError(f"No frames extracted from {video_path} into {out_dir}")
|
| 187 |
return frames
|
|
@@ -282,14 +300,22 @@ def frames_to_video(
|
|
| 282 |
vid_codec, pix_fmt = "libx264", "yuv420p"
|
| 283 |
extra_codec_flags = []
|
| 284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
cmd = [
|
| 286 |
"ffmpeg",
|
| 287 |
"-y",
|
| 288 |
"-framerate", _fps_str(meta.fps),
|
|
|
|
| 289 |
"-i", str(Path(frames_dir) / pattern),
|
| 290 |
"-c:v", vid_codec,
|
| 291 |
"-preset", "slow",
|
| 292 |
"-crf", str(crf),
|
|
|
|
| 293 |
"-pix_fmt", pix_fmt,
|
| 294 |
*extra_codec_flags,
|
| 295 |
*color_flags,
|
|
@@ -377,7 +403,10 @@ class VideoWorkspace:
|
|
| 377 |
|
| 378 |
def __exit__(self, *args) -> None:
|
| 379 |
if self._tmpdir:
|
| 380 |
-
|
|
|
|
|
|
|
|
|
|
| 381 |
|
| 382 |
def path(self, name: str) -> Path:
|
| 383 |
"""Return a path inside the workspace root."""
|
|
@@ -394,6 +423,8 @@ def _run(cmd: list[str]) -> subprocess.CompletedProcess:
|
|
| 394 |
cmd,
|
| 395 |
capture_output=True,
|
| 396 |
text=True,
|
|
|
|
|
|
|
| 397 |
)
|
| 398 |
if result.returncode != 0:
|
| 399 |
raise RuntimeError(
|
|
@@ -415,6 +446,10 @@ def _parse_rational(rat: str) -> float:
|
|
| 415 |
|
| 416 |
def _fps_str(fps: float) -> str:
|
| 417 |
"""Convert fps float to a clean string for FFmpeg -framerate."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
# Keep common exact fractions (24000/1001, 30000/1001, etc.)
|
| 419 |
common = {
|
| 420 |
23.976: "24000/1001",
|
|
@@ -424,7 +459,7 @@ def _fps_str(fps: float) -> str:
|
|
| 424 |
119.88: "120000/1001", # 120p (S1II high-frame-rate mode)
|
| 425 |
}
|
| 426 |
for approx, s in common.items():
|
| 427 |
-
if abs(fps - approx) < 0.
|
| 428 |
return s
|
| 429 |
return f"{fps:.6g}"
|
| 430 |
|
|
|
|
| 91 |
|
| 92 |
# Duration: prefer stream-level, fall back to format-level
|
| 93 |
dur_str = video_stream.get("duration") or data.get("format", {}).get("duration", "0")
|
| 94 |
+
try:
|
| 95 |
+
duration_s = float(dur_str)
|
| 96 |
+
except (ValueError, TypeError):
|
| 97 |
+
# ffprobe emits "N/A" for duration on VFR / container-less formats;
|
| 98 |
+
# fall back to 0.0 β frame_count will still be set from nb_frames.
|
| 99 |
+
duration_s = 0.0
|
| 100 |
|
| 101 |
# Frame count
|
| 102 |
nb_frames = video_stream.get("nb_frames")
|
|
|
|
| 119 |
except (ValueError, TypeError):
|
| 120 |
bit_depth = _bd_from_pix_fmt()
|
| 121 |
|
| 122 |
+
width = video_stream.get("width")
|
| 123 |
+
height = video_stream.get("height")
|
| 124 |
+
if not width or not height:
|
| 125 |
+
raise RuntimeError(
|
| 126 |
+
f"Video stream in {video_path} has no width/height β "
|
| 127 |
+
"the file may be corrupt or contain only audio."
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
return VideoMeta(
|
| 131 |
+
width=int(width),
|
| 132 |
+
height=int(height),
|
| 133 |
fps=fps,
|
| 134 |
frame_count=frame_count,
|
| 135 |
duration_s=duration_s,
|
|
|
|
| 194 |
cmd.append(str(out_dir / pattern))
|
| 195 |
_run(cmd)
|
| 196 |
|
| 197 |
+
# Filter to sequentially named PNGs only (FFmpeg writes purely numeric
|
| 198 |
+
# names, but a failed run might leave a non-numeric file behind).
|
| 199 |
+
frames = sorted(
|
| 200 |
+
(p for p in out_dir.glob("*.png") if p.stem.isdigit()),
|
| 201 |
+
key=lambda p: int(p.stem),
|
| 202 |
+
)
|
| 203 |
if not frames:
|
| 204 |
raise RuntimeError(f"No frames extracted from {video_path} into {out_dir}")
|
| 205 |
return frames
|
|
|
|
| 300 |
vid_codec, pix_fmt = "libx264", "yuv420p"
|
| 301 |
extra_codec_flags = []
|
| 302 |
|
| 303 |
+
# Build even-dimension filter.
|
| 304 |
+
# yuv420p / yuv420p10le require both width and height to be divisible by 2.
|
| 305 |
+
# The source video can have odd dimensions (some encoders emit 1919Γ1079 etc.).
|
| 306 |
+
# scale=trunc(iw/2)*2:trunc(ih/2)*2 is the standard FFmpeg idiom for this.
|
| 307 |
+
even_filter = "scale=trunc(iw/2)*2:trunc(ih/2)*2"
|
| 308 |
+
|
| 309 |
cmd = [
|
| 310 |
"ffmpeg",
|
| 311 |
"-y",
|
| 312 |
"-framerate", _fps_str(meta.fps),
|
| 313 |
+
"-start_number", "1", # explicit: frame files start at 000001.png
|
| 314 |
"-i", str(Path(frames_dir) / pattern),
|
| 315 |
"-c:v", vid_codec,
|
| 316 |
"-preset", "slow",
|
| 317 |
"-crf", str(crf),
|
| 318 |
+
"-vf", even_filter,
|
| 319 |
"-pix_fmt", pix_fmt,
|
| 320 |
*extra_codec_flags,
|
| 321 |
*color_flags,
|
|
|
|
| 403 |
|
| 404 |
def __exit__(self, *args) -> None:
|
| 405 |
if self._tmpdir:
|
| 406 |
+
try:
|
| 407 |
+
self._tmpdir.cleanup()
|
| 408 |
+
except Exception: # PermissionError on Windows/macOS antivirus lock
|
| 409 |
+
pass # Best-effort cleanup; the OS will reclaim the temp dir
|
| 410 |
|
| 411 |
def path(self, name: str) -> Path:
|
| 412 |
"""Return a path inside the workspace root."""
|
|
|
|
| 423 |
cmd,
|
| 424 |
capture_output=True,
|
| 425 |
text=True,
|
| 426 |
+
encoding="utf-8",
|
| 427 |
+
errors="replace", # Non-UTF-8 chars in FFmpeg stderr replaced with οΏ½
|
| 428 |
)
|
| 429 |
if result.returncode != 0:
|
| 430 |
raise RuntimeError(
|
|
|
|
| 446 |
|
| 447 |
def _fps_str(fps: float) -> str:
|
| 448 |
"""Convert fps float to a clean string for FFmpeg -framerate."""
|
| 449 |
+
if fps <= 0:
|
| 450 |
+
# Should never happen with valid ffprobe output, but guard against
|
| 451 |
+
# corrupt metadata producing -framerate 0 which makes FFmpeg error.
|
| 452 |
+
return "30"
|
| 453 |
# Keep common exact fractions (24000/1001, 30000/1001, etc.)
|
| 454 |
common = {
|
| 455 |
23.976: "24000/1001",
|
|
|
|
| 459 |
119.88: "120000/1001", # 120p (S1II high-frame-rate mode)
|
| 460 |
}
|
| 461 |
for approx, s in common.items():
|
| 462 |
+
if abs(fps - approx) < 0.015:
|
| 463 |
return s
|
| 464 |
return f"{fps:.6g}"
|
| 465 |
|
requirements.txt
CHANGED
|
@@ -8,7 +8,7 @@
|
|
| 8 |
gradio>=4.44.0,<5.0.0
|
| 9 |
numpy>=1.24.0
|
| 10 |
Pillow>=10.0.0
|
| 11 |
-
scipy>=1.11.0 # mask dilation
|
| 12 |
|
| 13 |
# ββ Fast mode (LaMa) ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
simple-lama-inpainting>=0.1.2
|
|
|
|
| 8 |
gradio>=4.44.0,<5.0.0
|
| 9 |
numpy>=1.24.0
|
| 10 |
Pillow>=10.0.0
|
| 11 |
+
scipy>=1.11.0 # mask dilation (pipeline/crop.py) + feather blur (pipeline/composite.py)
|
| 12 |
|
| 13 |
# ββ Fast mode (LaMa) ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
simple-lama-inpainting>=0.1.2
|
scripts/mirror_checkpoints.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# /// script
|
| 2 |
+
# requires-python = ">=3.10"
|
| 3 |
+
# dependencies = [
|
| 4 |
+
# "huggingface_hub>=0.26",
|
| 5 |
+
# "requests>=2.31",
|
| 6 |
+
# ]
|
| 7 |
+
# ///
|
| 8 |
+
"""
|
| 9 |
+
mirror_checkpoints.py
|
| 10 |
+
---------------------
|
| 11 |
+
One-off mirror job: copies the three model dependencies for the
|
| 12 |
+
Video Watermark Remover Space into JackIsNotInTheBox/Video_Watermark_Remover_Checkpoints
|
| 13 |
+
so the Space is insulated from upstream deletion.
|
| 14 |
+
|
| 15 |
+
Sources mirrored:
|
| 16 |
+
1. Wan-AI/Wan2.1-VACE-14B-diffusers (~75 GB, Apache-2.0) β vace-14b/
|
| 17 |
+
2. lightx2v/Wan2.1-Distill-Loras (single LoRA file) β loras/
|
| 18 |
+
3. big-lama.pt from GitHub releases (~196 MB, Apache-2.0) β lama/
|
| 19 |
+
|
| 20 |
+
Strategy
|
| 21 |
+
--------
|
| 22 |
+
Per-file streaming: download β upload β delete. Disk usage at any moment
|
| 23 |
+
is ~one file (max ~5 GB for a single VACE transformer shard), so this fits
|
| 24 |
+
on cpu-basic / cpu-upgrade Jobs without ever holding the full 75 GB locally.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
import os
|
| 28 |
+
import sys
|
| 29 |
+
from pathlib import Path
|
| 30 |
+
|
| 31 |
+
import requests
|
| 32 |
+
from huggingface_hub import HfApi, hf_hub_download, list_repo_files
|
| 33 |
+
|
| 34 |
+
# ---------------------------------------------------------------------------
|
| 35 |
+
# Config
|
| 36 |
+
# ---------------------------------------------------------------------------
|
| 37 |
+
DEST_REPO = "JackIsNotInTheBox/Video_Watermark_Remover_Checkpoints"
|
| 38 |
+
TOKEN = os.environ.get("HF_TOKEN")
|
| 39 |
+
if not TOKEN:
|
| 40 |
+
sys.exit("HF_TOKEN env var not set; pass via `--secrets HF_TOKEN=...`")
|
| 41 |
+
|
| 42 |
+
WORK = Path("/tmp/mirror")
|
| 43 |
+
WORK.mkdir(parents=True, exist_ok=True)
|
| 44 |
+
|
| 45 |
+
api = HfApi(token=TOKEN)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# ---------------------------------------------------------------------------
|
| 49 |
+
# Helpers
|
| 50 |
+
# ---------------------------------------------------------------------------
|
| 51 |
+
def stream_repo(
|
| 52 |
+
src_repo: str,
|
| 53 |
+
dest_prefix: str,
|
| 54 |
+
src_type: str = "model",
|
| 55 |
+
exclude_globs: list[str] | None = None,
|
| 56 |
+
) -> None:
|
| 57 |
+
"""Mirror every file in src_repo under dest_prefix in DEST_REPO."""
|
| 58 |
+
files = list_repo_files(src_repo, repo_type=src_type, token=TOKEN)
|
| 59 |
+
exclude = exclude_globs or []
|
| 60 |
+
files = [f for f in files if not any(Path(f).match(g) for g in exclude)]
|
| 61 |
+
print(f"\n=== {src_repo} β {dest_prefix}/ ({len(files)} files) ===", flush=True)
|
| 62 |
+
|
| 63 |
+
for i, fname in enumerate(files, 1):
|
| 64 |
+
print(f" [{i:>3}/{len(files)}] {fname}", flush=True)
|
| 65 |
+
local = hf_hub_download(
|
| 66 |
+
repo_id=src_repo,
|
| 67 |
+
repo_type=src_type,
|
| 68 |
+
filename=fname,
|
| 69 |
+
local_dir=str(WORK),
|
| 70 |
+
token=TOKEN,
|
| 71 |
+
)
|
| 72 |
+
api.upload_file(
|
| 73 |
+
path_or_fileobj=local,
|
| 74 |
+
path_in_repo=f"{dest_prefix}/{fname}",
|
| 75 |
+
repo_id=DEST_REPO,
|
| 76 |
+
repo_type="model",
|
| 77 |
+
commit_message=f"Mirror {src_repo}: {fname}",
|
| 78 |
+
)
|
| 79 |
+
Path(local).unlink(missing_ok=True)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def stream_url(url: str, dest_path_in_repo: str, commit_message: str) -> None:
|
| 83 |
+
"""Download a single file from an arbitrary URL, push to DEST_REPO, delete."""
|
| 84 |
+
fname = Path(dest_path_in_repo).name
|
| 85 |
+
print(f"\n=== {url} β {dest_path_in_repo} ===", flush=True)
|
| 86 |
+
local = WORK / fname
|
| 87 |
+
with requests.get(url, stream=True, timeout=300) as r:
|
| 88 |
+
r.raise_for_status()
|
| 89 |
+
with open(local, "wb") as fp:
|
| 90 |
+
for chunk in r.iter_content(chunk_size=1 << 20): # 1 MB chunks
|
| 91 |
+
fp.write(chunk)
|
| 92 |
+
api.upload_file(
|
| 93 |
+
path_or_fileobj=str(local),
|
| 94 |
+
path_in_repo=dest_path_in_repo,
|
| 95 |
+
repo_id=DEST_REPO,
|
| 96 |
+
repo_type="model",
|
| 97 |
+
commit_message=commit_message,
|
| 98 |
+
)
|
| 99 |
+
local.unlink(missing_ok=True)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
# ---------------------------------------------------------------------------
|
| 103 |
+
# Main
|
| 104 |
+
# ---------------------------------------------------------------------------
|
| 105 |
+
def main() -> None:
|
| 106 |
+
# 1. VACE-14B (largest β do first while disk is freshest)
|
| 107 |
+
stream_repo(
|
| 108 |
+
"Wan-AI/Wan2.1-VACE-14B-diffusers",
|
| 109 |
+
dest_prefix="vace-14b",
|
| 110 |
+
exclude_globs=["assets/*", ".gitattributes"],
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# 2. 4-step distill LoRA (single file)
|
| 114 |
+
stream_repo(
|
| 115 |
+
"lightx2v/Wan2.1-Distill-Loras",
|
| 116 |
+
dest_prefix="loras",
|
| 117 |
+
exclude_globs=[
|
| 118 |
+
"*.md",
|
| 119 |
+
".gitattributes",
|
| 120 |
+
# Pull only the rank-64 t2v 4-step LoRA β matches vace.py 8-step plan
|
| 121 |
+
"*i2v*",
|
| 122 |
+
"*rank32*",
|
| 123 |
+
"*rank128*",
|
| 124 |
+
],
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
# 3. LaMa from GitHub release
|
| 128 |
+
stream_url(
|
| 129 |
+
url="https://github.com/enesmsahin/simple-lama-inpainting/releases/download/v0.1.0/big-lama.pt",
|
| 130 |
+
dest_path_in_repo="lama/big-lama.pt",
|
| 131 |
+
commit_message="Mirror big-lama.pt from simple-lama-inpainting v0.1.0 GitHub release",
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
print("\nβ
All mirrors complete.")
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
if __name__ == "__main__":
|
| 138 |
+
main()
|