Upload liquid_flow/cfc_cell.py

714db4b verified 1 day ago

6.08 kB

	"""
	CfC Cell — Closed-form Continuous-time neural network cell.

	From: "Closed-form Continuous-time Neural Networks" (Hasani et al., 2022)

	The CfC model provides an approximate closed-form solution to Liquid Time-Constant (LTC)
	network dynamics without needing ODE solvers.

	Architecture:
	x(t) = σ(-f(x,I;θ_f) · t) ⊙ g(x,I;θ_g) + (1 - σ(-f(x,I;θ_f) · t)) ⊙ h(x,I;θ_h)

	Where:
	- f, g, h are neural network heads sharing a backbone
	- σ is the sigmoid (replacing exponential decay for gradient stability)
	- t is a time parameter
	- The sigmoidal terms act as time-continuous gates between g and h

	Key properties:
	- No ODE solving → 100x+ faster than Neural ODEs
	- Time-continuous gating mechanism → adaptive computation
	- Closed-form → stable gradients, easy to train
	- Naturally causal → good for sequential processing

	For 2D image inputs: we treat the spatial sequence as "time" steps for the CfC,
	allowing the liquid dynamics to model spatial dependencies with adaptive gates.
	"""

	import torch
	import torch.nn as nn
	import torch.nn.functional as F


	class CfCCell(nn.Module):
	"""
	Single CfC cell with backbone + 3 heads (f, g, h).

	Args:
	dim: Hidden dimension
	backbone_dropout: Dropout in backbone layers
	time_scale: Range [a, b] for time parameter sampling
	use_conv: Add conv1d for local context
	"""

	def __init__(self, dim, backbone_dropout=0.0, time_scale=(0.0, 1.0), use_conv=True):
	super().__init__()
	self.dim = dim
	self.time_scale = time_scale

	# Shared backbone
	backbone_dim = dim * 3
	self.backbone = nn.Sequential(
	nn.Linear(dim + dim, backbone_dim),
	nn.LayerNorm(backbone_dim),
	nn.SiLU(),
	nn.Dropout(backbone_dropout),
	nn.Linear(backbone_dim, dim * 4),
	nn.LayerNorm(dim * 4),
	)

	# Optional 1D conv
	self.conv = nn.Conv1d(dim, dim, kernel_size=3, padding=1, groups=dim) if use_conv else None

	# Heads
	self.f_head = nn.Sequential(nn.Linear(dim, dim), nn.LayerNorm(dim), nn.Tanh())
	self.g_head = nn.Sequential(nn.Linear(dim, dim), nn.LayerNorm(dim), nn.GELU())
	self.h_head = nn.Sequential(nn.Linear(dim, dim), nn.LayerNorm(dim), nn.GELU())

	self.out_proj = nn.Linear(dim, dim)
	self._init_weights()

	def _init_weights(self):
	for m in self.modules():
	if isinstance(m, nn.Linear):
	nn.init.normal_(m.weight, std=0.02)
	if m.bias is not None:
	nn.init.zeros_(m.bias)

	def forward(self, x, h_prev=None, t=None):
	"""
	Args:
	x: [B, dim] or [B, L, dim]
	h_prev: Previous hidden state [B, dim]
	t: Time parameter
	Returns: h: [B, dim] or [B, L, dim]
	"""
	is_seq = x.dim() == 3
	B, device = x.shape[0], x.device

	if is_seq:
	return self._forward_seq(x, h_prev, t)

	if h_prev is None:
	h_prev = torch.zeros(B, self.dim, device=device)
	if t is None:
	t = torch.rand(B, 1, device=device) * (self.time_scale[1] - self.time_scale[0]) + self.time_scale[0]
	elif t.dim() == 1:
	t = t.unsqueeze(1)

	return self._step(x, h_prev, t)

	def _forward_seq(self, x, h_prev=None, t=None):
	B, L, D = x.shape
	device = x.device

	if t is None:
	t = torch.rand(B, 1, 1, device=device) * (self.time_scale[1] - self.time_scale[0]) + self.time_scale[0]

	outputs = []
	h = torch.zeros(B, D, device=device) if h_prev is None else h_prev
	for step in range(L):
	h = self._step(x[:, step, :], h, t.squeeze(-1) if t.dim() == 3 else t)
	outputs.append(h)
	return torch.stack(outputs, dim=1)

	def _step(self, x, h_prev, t):
	"""Core CfC step."""
	combined = torch.cat([x, h_prev], dim=-1)
	backbone_out = self.backbone(combined)
	f_base, g_base, h_base, skip = backbone_out.chunk(4, dim=-1)

	if self.conv is not None:
	f_base = f_base + self.conv(f_base.unsqueeze(1).transpose(1,2)).transpose(1,2).squeeze(1)
	g_base = g_base + self.conv(g_base.unsqueeze(1).transpose(1,2)).transpose(1,2).squeeze(1)
	h_base = h_base + self.conv(h_base.unsqueeze(1).transpose(1,2)).transpose(1,2).squeeze(1)

	f_out = self.f_head(f_base)
	g_out = self.g_head(g_base)
	h_out = self.h_head(h_base)

	gate = torch.sigmoid(-f_out * t)
	h = gate * g_out + (1 - gate) * h_out + skip
	return self.out_proj(h)


	class CfCBlock(nn.Module):
	"""CfC block for 2D image processing with residual connection."""

	def __init__(self, dim, dropout=0.0, time_scale=(0.0, 1.0), expansion_factor=2):
	super().__init__()
	self.dim = dim
	self.norm1 = nn.LayerNorm(dim)
	self.norm2 = nn.LayerNorm(dim)
	self.cfc = CfCCell(dim=dim, backbone_dropout=dropout, time_scale=time_scale, use_conv=True)

	ff_dim = dim * expansion_factor
	self.ff = nn.Sequential(
	nn.Linear(dim, ff_dim), nn.GELU(), nn.Dropout(dropout),
	nn.Linear(ff_dim, dim), nn.Dropout(dropout),
	)

	self.pos_embed = nn.Parameter(torch.randn(1, 4096, dim) * 0.02)

	def forward(self, x, return_2d=True):
	is_2d = x.dim() == 4
	if is_2d:
	B, C, H, W = x.shape
	L = H * W
	x = x.flatten(2).transpose(1, 2)
	else:
	B, L, C = x.shape

	x_with_pos = x + self.pos_embed[:, :L, :]
	residual = x
	h = self.cfc(self.norm1(x_with_pos))
	x_out = h + self.ff(self.norm2(h + residual))

	if is_2d and return_2d:
	x_out = x_out.transpose(1, 2).reshape(B, C, H, W)
	return x_out