Charlie81
/

CS521FinalProject

Model card Files Files and versions

Charlie81 commited on Dec 4, 2025

Commit

d5537e3

·

1 Parent(s): 62fa95e

patch quantize

Files changed (1) hide show

quantization.py +11 -7

quantization.py CHANGED Viewed

@@ -80,22 +80,26 @@ class MixedPrecisionQuantizer:
         if bits == 32:
             return layer
-        weight = layer.weight.data
-        bias = layer.bias.data if layer.bias is not None else None
         # Symmetric quantization
         qmin = -(2 ** (bits - 1))
         qmax = 2 ** (bits - 1) - 1
-        # Calculate scale
-        max_val = torch.max(torch.abs(weight))
         scale = max_val / qmax
-        # Quantize
         weight_q = torch.clamp(torch.round(weight / scale), qmin, qmax)
-        # Store quantized weights and scale
-        layer.weight.data = weight_q.to(torch.int8 if bits <= 8 else torch.int16)
         layer.weight_scale = scale
         layer.quantized = True
         layer.bits = bits

         if bits == 32:
             return layer
+        weight = layer.weight.data.clone()
         # Symmetric quantization
         qmin = -(2 ** (bits - 1))
         qmax = 2 ** (bits - 1) - 1
+        # Calculate scale per-channel (per output channel)
+        # This provides better accuracy than per-tensor quantization
+        max_val = torch.max(torch.abs(weight), dim=1, keepdim=True)[0]
+        max_val = torch.clamp(max_val, min=1e-5)  # Avoid division by zero
         scale = max_val / qmax
+        # Quantize and dequantize (fake quantization)
         weight_q = torch.clamp(torch.round(weight / scale), qmin, qmax)
+        weight_dq = weight_q * scale
+        # Store dequantized weights as float (required for autograd)
+        layer.weight.data = weight_dq.contiguous()
+        # Store quantization metadata as layer attributes
         layer.weight_scale = scale
         layer.quantized = True
         layer.bits = bits