krystv
/

LiquidFlow

+"""
+Comprehensive smoke test for LiquidFlow.
+Tests: all model sizes, forward/backward, gradient health,
+loss convergence direction, sampling, checkpoint save/load.
+NO actual training — just confirms everything is wired correctly.
+"""
+import sys, os, json, tempfile
+sys.path.insert(0, '/app')
+import torch
+import torch.nn as nn
+from liquidflow.model import (
+    liquidflow_tiny, liquidflow_small, liquidflow_base, liquidflow_512,
+    LiquidCfCCell, SelectiveSSM, LiquidSSMBlock, create_scan_patterns
+)
+from liquidflow.losses import PhysicsInformedFlowLoss, EMAModel
+from liquidflow.sampling import euler_sample, heun_sample, make_grid_image
+PASS = 0
+FAIL = 0
+def check(name, condition):
+    global PASS, FAIL
+    if condition:
+        PASS += 1
+        print(f"  ✅ {name}")
+    else:
+        FAIL += 1
+        print(f"  ❌ {name}")
+# =========================================================
+print("=" * 60)
+print("1. MODEL VARIANTS — forward pass + shapes")
+print("=" * 60)
+configs = [
+    ("tiny-128",  liquidflow_tiny,  128, 2),
+    ("small-128", liquidflow_small, 128, 2),
+    ("base-256",  liquidflow_base,  256, 1),
+    ("512",       liquidflow_512,   512, 1),
+]
+for tag, factory, img_sz, bs in configs:
+    m = factory(img_size=img_sz)
+    p = m.count_params()
+    x = torch.randn(bs, 3, img_sz, img_sz)
+    t = torch.rand(bs)
+    v = m(x, t)
+    check(f"{tag}: {p/1e6:.1f}M params, output shape {v.shape}",
+          v.shape == x.shape)
+# =========================================================
+print("\n" + "=" * 60)
+print("2. BACKWARD PASS — gradients exist for every param")
+print("=" * 60)
+m = liquidflow_tiny(32)
+x1 = torch.randn(2, 3, 32, 32)
+x0 = torch.randn(2, 3, 32, 32)
+t  = torch.rand(2)
+t_e = t.view(2,1,1,1)
+x_t = t_e * x1 + (1-t_e) * x0
+v = m(x_t, t)
+loss_fn = PhysicsInformedFlowLoss()
+loss, ld = loss_fn(v, x0, x1, t, step=100)
+loss.backward()
+no_grad_params = []
+for name, p in m.named_parameters():
+    if p.requires_grad and p.grad is None:
+        no_grad_params.append(name)
+check("All parameters receive gradients", len(no_grad_params) == 0)
+if no_grad_params:
+    print(f"    Missing grads: {no_grad_params[:5]}...")
+# =========================================================
+print("\n" + "=" * 60)
+print("3. GRADIENT HEALTH — no NaN, no Inf, reasonable norms")
+print("=" * 60)
+has_nan = any(torch.isnan(p.grad).any() for p in m.parameters() if p.grad is not None)
+has_inf = any(torch.isinf(p.grad).any() for p in m.parameters() if p.grad is not None)
+max_grad = max(p.grad.abs().max().item() for p in m.parameters() if p.grad is not None)
+check("No NaN gradients", not has_nan)
+check("No Inf gradients", not has_inf)
+check(f"Max grad norm reasonable ({max_grad:.4f} < 100)", max_grad < 100)
+# =========================================================
+print("\n" + "=" * 60)
+print("4. LOSS CONVERGENCE DIRECTION — 3 optimizer steps")
+print("=" * 60)
+m2 = liquidflow_tiny(32)
+opt = torch.optim.AdamW(m2.parameters(), lr=1e-3)
+losses_track = []
+for step in range(3):
+    x1 = torch.randn(4, 3, 32, 32)
+    x0 = torch.randn(4, 3, 32, 32)
+    t  = torch.rand(4); t_e = t.view(4,1,1,1)
+    x_t = t_e*x1 + (1-t_e)*x0
+    v = m2(x_t, t)
+    loss, _ = loss_fn(v, x0, x1, t, step=step)
+    opt.zero_grad(); loss.backward(); opt.step()
+    losses_track.append(loss.item())
+check(f"Loss finite across steps: {[f'{l:.4f}' for l in losses_track]}",
+      all(not (l != l or abs(l) > 1e6) for l in losses_track))  # no NaN, not huge
+# =========================================================
+print("\n" + "=" * 60)
+print("5. INDIVIDUAL COMPONENTS")
+print("=" * 60)
+# LiquidCfCCell
+cell = LiquidCfCCell(64, 64)
+out = cell(torch.randn(2, 16, 64))
+check(f"LiquidCfCCell: input (2,16,64) → output {out.shape}", out.shape == (2,16,64))
+# SelectiveSSM
+ssm = SelectiveSSM(64, d_state=8)
+out = ssm(torch.randn(2, 16, 64))
+check(f"SelectiveSSM: input (2,16,64) → output {out.shape}", out.shape == (2,16,64))
+# LiquidSSMBlock
+block = LiquidSSMBlock(64, d_state=8)
+out = block(torch.randn(2, 16, 64))
+check(f"LiquidSSMBlock: input (2,16,64) → output {out.shape}", out.shape == (2,16,64))
+# Scan patterns
+patterns, inv = create_scan_patterns(8, 8)
+check(f"Scan patterns: {len(patterns)} patterns of length {len(patterns[0])}",
+      len(patterns) == 4 and len(patterns[0]) == 64)
+# Verify scan ↔ unscan is identity
+for i, (p, ip) in enumerate(zip(patterns, inv)):
+    dummy = torch.arange(64)
+    recovered = dummy[p][ip]
+    check(f"Scan pattern {i}: scan→unscan is identity", torch.equal(recovered, dummy))
+# =========================================================
+print("\n" + "=" * 60)
+print("6. SAMPLING — Euler & Heun produce valid images")
+print("=" * 60)
+m3 = liquidflow_tiny(32)
+m3.eval()
+with torch.no_grad():
+    imgs_euler = euler_sample(m3, (4,3,32,32), num_steps=5)
+    check(f"Euler sample shape {imgs_euler.shape}, finite",
+          imgs_euler.shape == (4,3,32,32) and torch.isfinite(imgs_euler).all())
+    imgs_heun = heun_sample(m3, (4,3,32,32), num_steps=5)
+    check(f"Heun sample shape {imgs_heun.shape}, finite",
+          imgs_heun.shape == (4,3,32,32) and torch.isfinite(imgs_heun).all())
+    clamped = imgs_euler.clamp(-1,1)*0.5+0.5
+    grid = make_grid_image(clamped, nrow=2)
+    grid.save('/app/smoke_test_grid.png')
+    check(f"Grid image saved ({grid.size})", grid.size[0] > 0)
+# =========================================================
+print("\n" + "=" * 60)
+print("7. EMA — shadow copy matches, save/load works")
+print("=" * 60)
+m4 = liquidflow_tiny(32)
+ema = EMAModel(m4, decay=0.999)
+ema.update(m4)
+ema.update(m4)
+ema.apply_shadow(m4)
+# After apply, model params should be close to shadow
+ema.restore(m4)
+check("EMA apply/restore cycle completes", True)
+sd = ema.state_dict()
+check("EMA state_dict has shadow and step",
+      'shadow' in sd and 'step' in sd)
+# =========================================================
+print("\n" + "=" * 60)
+print("8. CHECKPOINT — save & reload matches")
+print("=" * 60)
+m5 = liquidflow_tiny(32)
+opt5 = torch.optim.AdamW(m5.parameters(), lr=1e-3)
+ckpt = {
+    'model': m5.state_dict(),
+    'optimizer': opt5.state_dict(),
+    'epoch': 5,
+    'global_step': 100,
+}
+tmp = tempfile.mktemp(suffix='.pt')
+torch.save(ckpt, tmp)
+m6 = liquidflow_tiny(32)
+loaded = torch.load(tmp, map_location='cpu', weights_only=False)
+m6.load_state_dict(loaded['model'])
+check("Checkpoint save/load cycle works", loaded['epoch'] == 5)
+os.remove(tmp)
+# =========================================================
+print("\n" + "=" * 60)
+print("9. PHYSICS LOSS COMPONENTS — each term finite & positive")
+print("=" * 60)
+x_fake = torch.randn(2, 3, 32, 32)
+lf = PhysicsInformedFlowLoss(lambda_smooth=0.01, lambda_tv=0.001)
+sm = lf.smoothness_loss(x_fake)
+tv = lf.total_variation_loss(x_fake)
+check(f"Smoothness loss: {sm.item():.4f} (finite, positive)",
+      torch.isfinite(sm) and sm.item() > 0)
+check(f"TV loss: {tv.item():.4f} (finite, positive)",
+      torch.isfinite(tv) and tv.item() > 0)
+# =========================================================
+print("\n" + "=" * 60)
+print("10. MEMORY FOOTPRINT SUMMARY")
+print("=" * 60)
+for tag, factory, img_sz in [("tiny-32",liquidflow_tiny,32),
+                               ("tiny-128",liquidflow_tiny,128),
+                               ("small-128",liquidflow_small,128),
+                               ("base-256",liquidflow_base,256),
+                               ("512",liquidflow_512,512)]:
+    m = factory(img_size=img_sz)
+    p = m.count_params()
+    # Model memory (fp16 training)
+    model_gb = p * 2 / 1e9  # fp16 params
+    opt_gb = p * 8 / 1e9    # optimizer states (fp32 momentum + variance)
+    tokens = (img_sz // m.patch_size) ** 2
+    print(f"  {tag:12s}: {p/1e6:6.1f}M params | "
+          f"model={model_gb*1000:.0f}MB | opt={opt_gb*1000:.0f}MB | "
+          f"tokens={tokens:5d} | patch={m.patch_size}")
+# =========================================================
+print("\n" + "=" * 60)
+print(f"RESULTS: {PASS} passed, {FAIL} failed")
+print("=" * 60)
+sys.exit(0 if FAIL == 0 else 1)