File size: 5,103 Bytes

1e6fe0a

"""
🎯 core-dino | DINO-style Loss Functions 💥

Defines the cross-view contrastive loss used in DINO setups, 
including temperature scaling, centering, and teacher-student divergence.

Includes:
- DinoSpatialLoss: Temp-scaled CE loss with center momentum 🌀
- DinoSinkhornSpatialLoss: Sinkhorn-based balanced assignment loss ⚖️

Author: Gajesh Ladhar  
🔗 LinkedIn: https://www.linkedin.com/in/gajeshladhar/  
🤗 Hugging Face: https://huggingface.co/gajeshladhar
"""

import torch
from torch import nn 
import torch.nn.functional as F


class DinoSpatialLoss(nn.Module):
    """
    🌀 DINO loss using temperature-scaled cross-entropy over spatial tokens.

    - Aligns teacher & student spatial features (B, C, H, W)
    - Applies center momentum for teacher stability

    Args:
        teacher_temp (float): Temperature for teacher softmax
        student_temp (float): Temperature for student softmax
        center_momentum (float): EMA factor for center update
    """
    def __init__(self, teacher_temp=0.04, student_temp=0.1, center_momentum=0.9):
        super().__init__()
        self.teacher_temp = teacher_temp
        self.student_temp = student_temp
        self.center_momentum = center_momentum
        self.register_buffer("center", torch.zeros(1, 1))  # lazy init

    def forward(self, student_feat, teacher_feat):
        """
        Compute loss over (B, C, H, W) features.

        Args:
            student_feat (Tensor): Student output, shape (B, C, Hs, Ws)
            teacher_feat (Tensor): Teacher output, shape (B, C, Ht, Wt)

        Returns:
            Tensor: Scalar DINO loss
        """

        # Initialize center shape based on teacher feature dim
        if self.center.shape[1] == 1:
            self.center = self.center.new_zeros(1, teacher_feat.shape[1])

        # Resize student to teacher resolution
        student_resized = F.interpolate(student_feat, size=teacher_feat.shape[2:], mode='bilinear', align_corners=False)

        # Flatten spatial dims: (B, C, H, W) → (B*H*W, C)
        B, C, H, W = student_resized.shape
        student_flat = student_resized.permute(0, 2, 3, 1).reshape(-1, C)  # (BHW, C)
        teacher_flat = teacher_feat.permute(0, 2, 3, 1).reshape(-1, C)     # (BHW, C)

        # Apply softmax (teacher uses center)
        student_logits = student_flat / self.student_temp
        teacher_logits = (teacher_flat - self.center) / self.teacher_temp

        student_log_probs = F.log_softmax(student_logits, dim=-1)
        teacher_probs = F.softmax(teacher_logits, dim=-1).detach()

        # Cross-entropy loss
        loss = - (teacher_probs * student_log_probs).sum(dim=-1).mean()

        # Update center
        batch_center = teacher_probs.mean(dim=0, keepdim=True)
        self.center = self.center * self.center_momentum + batch_center * (1 - self.center_momentum)

        return loss
    
    
    
class SinkhornKnopp(nn.Module):
    """
    ⚖️ Sinkhorn-Knopp normalization for balanced assignments.

    Args:
        num_iters (int): Number of normalization iterations
        eps (float): Stabilizer to avoid div-by-zero
    """
    def __init__(self, num_iters: int = 3, eps: float = 1e-6):
        super().__init__()
        self.num_iters = num_iters
        self.eps = eps

    def forward(self, logits: torch.Tensor) -> torch.Tensor:
        logits = logits - logits.max(dim=1, keepdim=True)[0]  # stabilize
        Q = torch.exp(logits).clone()
        Q /= Q.sum()

        for _ in range(self.num_iters):
            Q /= Q.sum(dim=1, keepdim=True) + self.eps  # row normalization
            Q /= Q.sum(dim=0, keepdim=True) + self.eps  # column normalization

        return Q

class DinoSinkhornSpatialLoss(nn.Module):
    """
    🌀 DINO loss with Sinkhorn assignment — no center, balanced targets.

    Args:
        student_temp (float): Temperature for student softmax
        sinkhorn_iters (int): Iterations for Sinkhorn normalization
    """
    def __init__(self, student_temp=0.1, sinkhorn_iters=3):
        super().__init__()
        self.student_temp = student_temp
        self.sinkhorn = SinkhornKnopp(sinkhorn_iters)

    def forward(self, student_feat, teacher_feat):
        """
        student_feat: (B, C, Hs, Ws)
        teacher_feat: (B, C, Ht, Wt)
        """

        # Resize student to teacher resolution
        student_resized = F.interpolate(
            student_feat, size=teacher_feat.shape[2:], mode='bilinear', align_corners=False
        )

        # Flatten spatial dims: (B, C, H, W) → (BHW, C)
        B, C, H, W = student_resized.shape
        student_flat = student_resized.permute(0, 2, 3, 1).reshape(-1, C)
        teacher_flat = teacher_feat.permute(0, 2, 3, 1).reshape(-1, C)

        # Teacher: apply Sinkhorn (no temp, no center)
        teacher_probs = self.sinkhorn(teacher_flat).detach()

        # Student: softmax with temp
        student_log_probs = F.log_softmax(student_flat / self.student_temp, dim=-1)

        # Cross-entropy loss
        loss = -(teacher_probs * student_log_probs).sum(dim=-1).mean()

        return loss