anycalib-gpu / app.py
SebRincon's picture
Fix: graceful GPU/CPU fallback, remove spaces dependency requirement
15e4e8c verified
"""
AnyCalib β€” Full-Resolution Camera Calibration & Lens Correction
Gradio Space running the full AnyCalib pipeline:
1. DINOv2 ViT-L/14 backbone β†’ LightDPT decoder β†’ ConvexTangentDecoder head
2. RANSAC + Gauss-Newton calibrator β†’ camera intrinsics [f, cx, cy, k1, ...]
3. Full-resolution undistortion via grid_sample
No resolution limits. No quantization. Full FP32 inference.
Runs on GPU if available (ZeroGPU / dedicated), falls back to CPU.
"""
from __future__ import annotations
import json
import time
import gradio as gr
import numpy as np
import torch
# ── GPU decorator (works on ZeroGPU Spaces, no-op elsewhere) ──
try:
import spaces
gpu_decorator = spaces.GPU(duration=120)
except (ImportError, Exception):
# Not on a ZeroGPU Space β€” use identity decorator
def gpu_decorator(fn):
return fn
# ── Load model at startup ──
from anycalib.model.anycalib_pretrained import AnyCalib
from anycalib.cameras.factory import CameraFactory
print("[anycalib] Loading model...")
t0 = time.time()
MODEL = AnyCalib(model_id="anycalib_gen")
MODEL.eval()
TOTAL_PARAMS = sum(p.numel() for p in MODEL.parameters())
print(f"[anycalib] Model loaded in {time.time() - t0:.1f}s ({TOTAL_PARAMS:,} params)")
def _build_undistort_grid(camera, params, h, w, scale=1.0, target_proj="perspective"):
"""Build undistortion sampling grid (mirrors AnyCalibRunner._undistort_grid)."""
params_b = params[None, ...] if params.ndim == 1 else params
num_f = int(camera.NUM_F)
f = params_b[..., None, :num_f]
c = params_b[..., None, num_f:num_f + 2]
im_coords = camera.pixel_grid_coords(h, w, params_b, 0.0).reshape(-1, 2)
im_n = (im_coords - c) / f
r = torch.linalg.norm(im_n, dim=-1) / scale
theta = camera.ideal_unprojection(r, target_proj)
phi = torch.atan2(im_n[..., 1], im_n[..., 0])
R = torch.sin(theta)
rays = torch.stack((R * torch.cos(phi), R * torch.sin(phi), torch.cos(theta)), dim=-1)
params_proj = params_b
if num_f == 2:
params_proj = params_b.clone()
params_proj[..., :2] = f.amax(dim=-1, keepdim=True)
map_xy, valid = camera.project(params_proj, rays)
if valid is not None:
valid = valid.reshape(1, h, w)[0]
grid = 2.0 * map_xy.reshape(1, h, w, 2) / map_xy.new_tensor((w, h)) - 1.0
return grid, valid
@gpu_decorator
@torch.no_grad()
def run_calibration(
input_image: np.ndarray,
cam_id: str,
scale: float,
target_proj: str,
padding_mode: str,
interp_mode: str,
k1_threshold: float,
):
"""Full pipeline: predict -> fit -> undistort at original resolution."""
if input_image is None:
raise gr.Error("Please upload an image.")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL.to(device)
h, w = input_image.shape[:2]
t_total = time.time()
# Preprocess
x = input_image.astype("float32") / 255.0
x = np.transpose(x, (2, 0, 1))
x_t = torch.from_numpy(x).to(device)
# Neural network inference
t0 = time.time()
out = MODEL.predict(x_t, cam_id=cam_id)
intrinsics = out["intrinsics"]
pred_size = out.get("pred_size")
t_infer = time.time() - t0
# Parse intrinsics
camera = CameraFactory.create_from_id(cam_id)
num_f = int(camera.NUM_F)
intr_list = intrinsics.detach().cpu().numpy().astype(np.float64).tolist()
focal = intr_list[:num_f]
cx_val, cy_val = intr_list[num_f], intr_list[num_f + 1]
k1_val = intr_list[num_f + 2] if len(intr_list) > num_f + 2 else 0.0
f_px = focal[0]
fov_h = float(2 * np.degrees(np.arctan(w / (2 * f_px)))) if f_px > 0 else 0
fov_v = float(2 * np.degrees(np.arctan(h / (2 * f_px)))) if f_px > 0 else 0
if k1_val < -0.001:
dist_type = "Barrel (k1 < 0)"
elif k1_val > 0.001:
dist_type = "Pincushion (k1 > 0)"
else:
dist_type = "Negligible"
skip_undistort = k1_threshold > 0 and abs(k1_val) < k1_threshold
if skip_undistort:
corrected = input_image.copy()
valid_frac = 1.0
t_undistort = 0.0
else:
t0 = time.time()
grid, valid = _build_undistort_grid(
camera, intrinsics, h, w,
scale=scale, target_proj=target_proj,
)
y_t = torch.nn.functional.grid_sample(
x_t[None, ...], grid,
mode=interp_mode,
padding_mode=padding_mode,
align_corners=False,
)
t_undistort = time.time() - t0
valid_frac = float(valid.float().mean().item()) if valid is not None else 1.0
y = y_t[0].clamp(0, 1).detach().cpu().numpy()
y = np.transpose(y, (1, 2, 0))
corrected = (y * 255.0 + 0.5).astype("uint8")
t_total_elapsed = time.time() - t_total
hw_label = "GPU" if device.type == "cuda" else "CPU"
params_md = f"""
### Camera Intrinsics
| Parameter | Value |
|-----------|-------|
| **Focal length** | `{f_px:.2f}` px |
| **Principal point** | `({cx_val:.2f}, {cy_val:.2f})` px |
| **Distortion k1** | `{k1_val:.6f}` |
| **Distortion type** | {dist_type} |
| **FOV (horizontal)** | `{fov_h:.1f}` deg |
| **FOV (vertical)** | `{fov_v:.1f}` deg |
| **Valid pixel fraction** | `{valid_frac:.3f}` |
| **k1 gated (skipped)** | `{skip_undistort}` |
### Image Info
| Property | Value |
|----------|-------|
| **Input resolution** | `{w} x {h}` ({w*h:,} px) |
| **Model working size** | `{pred_size}` |
| **Camera model** | `{cam_id}` |
| **Scale** | `{scale}` |
| **Target projection** | `{target_proj}` |
### Timing ({hw_label})
| Stage | Time |
|-------|------|
| Neural net inference | `{t_infer*1000:.0f}` ms |
| Undistortion (grid_sample) | `{t_undistort*1000:.0f}` ms |
| **Total** | **`{t_total_elapsed*1000:.0f}` ms** |
| Hardware | `{device}` ({hw_label}) |
"""
raw_json = json.dumps({
"intrinsics": {
"focal_length_px": focal,
"principal_point": [cx_val, cy_val],
"k1": k1_val,
},
"fov": {"horizontal_deg": fov_h, "vertical_deg": fov_v},
"distortion": {"type": dist_type, "k1_gated": skip_undistort},
"image": {
"input_resolution": [w, h],
"total_pixels": w * h,
"model_working_size": pred_size,
},
"camera": {
"model": cam_id,
"scale": scale,
"target_projection": target_proj,
"padding_mode": padding_mode,
"interpolation": interp_mode,
},
"quality": {
"valid_pixel_fraction": valid_frac,
},
"timing_ms": {
"neural_net": round(t_infer * 1000, 1),
"undistortion": round(t_undistort * 1000, 1),
"total": round(t_total_elapsed * 1000, 1),
},
"device": str(device),
"all_intrinsics_raw": intr_list,
}, indent=2)
return corrected, params_md, raw_json
# ── Gradio UI ──
with gr.Blocks() as demo:
gr.Markdown("""
# AnyCalib β€” Full-Resolution Camera Calibration
Single-image lens calibration & distortion correction powered by
[AnyCalib](https://github.com/javrtg/AnyCalib) (DINOv2 ViT-L/14 + LightDPT + ConvexTangentDecoder, ~320M params).
Full FP32 inference, no quantization, no resolution limits. Automatically uses GPU when available.
Upload any image and get the **corrected (undistorted) image** at original resolution,
plus camera intrinsics, FOV, distortion parameters, and timing.
""")
with gr.Row():
with gr.Column(scale=1):
input_image = gr.Image(
label="Input Image",
type="numpy",
sources=["upload", "clipboard"],
)
with gr.Accordion("Advanced Settings", open=False):
cam_id = gr.Dropdown(
label="Camera Model",
choices=[
"simple_division:1",
"division:1",
"simple_radial:1",
"simple_kb:1",
"simple_pinhole",
"pinhole",
],
value="simple_division:1",
)
scale = gr.Slider(
label="Focal Length Scale (< 1 = wider FOV, less crop)",
minimum=0.5, maximum=1.5, step=0.05, value=1.0,
)
target_proj = gr.Dropdown(
label="Target Projection",
choices=["perspective", "stereographic", "equidistant", "equisolid", "orthographic"],
value="perspective",
)
padding_mode = gr.Dropdown(
label="Padding Mode",
choices=["border", "zeros", "reflection"],
value="border",
)
interp_mode = gr.Dropdown(
label="Interpolation",
choices=["bilinear", "bicubic", "nearest"],
value="bilinear",
)
k1_threshold = gr.Slider(
label="k1 Threshold (skip undistortion if |k1| below this)",
minimum=0.0, maximum=0.1, step=0.005, value=0.0,
)
run_btn = gr.Button("Run Calibration", variant="primary", size="lg")
with gr.Column(scale=1):
output_image = gr.Image(label="Corrected (Undistorted) Image", type="numpy")
with gr.Row():
with gr.Column():
params_output = gr.Markdown(label="Camera Parameters")
with gr.Column():
json_output = gr.Code(label="Raw JSON Output", language="json")
gr.Markdown("""
---
### How it works
1. **Upload** any image (phone photo, action cam, drone, dashcam, etc.)
2. The model predicts per-pixel **ray directions** using a DINOv2 ViT-L/14 backbone
3. **RANSAC + Gauss-Newton** calibrator fits camera intrinsics `[f, cx, cy, k1]` from the rays
4. Image is **undistorted at full resolution** via differentiable grid_sample
5. All parameters and raw JSON output are displayed
### Links
- Raw weights: [SebRincon/anycalib](https://huggingface.co/SebRincon/anycalib) (safetensors)
- ONNX models: [SebRincon/anycalib-onnx](https://huggingface.co/SebRincon/anycalib-onnx) (FP32/FP16/INT8)
- WASM demo: [SebRincon/anycalib-wasm](https://huggingface.co/spaces/SebRincon/anycalib-wasm) (browser-only)
- Source: [github.com/javrtg/AnyCalib](https://github.com/javrtg/AnyCalib)
""")
run_btn.click(
fn=run_calibration,
inputs=[input_image, cam_id, scale, target_proj, padding_mode, interp_mode, k1_threshold],
outputs=[output_image, params_output, json_output],
)
input_image.change(
fn=run_calibration,
inputs=[input_image, cam_id, scale, target_proj, padding_mode, interp_mode, k1_threshold],
outputs=[output_image, params_output, json_output],
)
if __name__ == "__main__":
demo.launch()