HuggingFaceM4
/

tiny-random-siglip

Zero-Shot Image Classification

Inference Endpoints

Model card Files Files and versions Community

VictorSanh commited on Jan 31

Commit

2ca24ef

•

1 Parent(s): 9947e3f

ops in fp16

Files changed (1) hide show

modeling_siglip.py +11 -4

modeling_siglip.py CHANGED Viewed

@@ -95,10 +95,11 @@ def _trunc_normal_(tensor, mean, std, a, b):
     # Use inverse cdf transform for normal distribution to get truncated
     # standard normal
-    if tensor.dtype == torch.bfloat16:
         tensor = tensor.to(torch.float32)
         tensor.erfinv_()
-        tensor = tensor.to(torch.bfloat16)
     else:
         tensor.erfinv_()
@@ -107,7 +108,13 @@ def _trunc_normal_(tensor, mean, std, a, b):
     tensor.add_(mean)
     # Clamp to ensure it's in the proper range
-    tensor.clamp_(min=a, max=b)
 def trunc_normal_tf_(
@@ -732,7 +739,7 @@ class SiglipPreTrainedModel(PreTrainedModel):
             nn.init.normal_(module.attention.in_proj_weight.data)
             nn.init.zeros_(module.attention.in_proj_bias.data)
         elif isinstance(module, SiglipModel):
-            logit_scale_init = torch.log(torch.tensor(1.0))
             module.logit_scale.data.fill_(logit_scale_init)
             module.logit_bias.data.zero_()
         elif isinstance(module, (nn.Linear, nn.Conv2d)):

     # Use inverse cdf transform for normal distribution to get truncated
     # standard normal
+    if tensor.dtype == torch.float16:
+        # The `erfinv_` op is not (yet?) defined in float16
         tensor = tensor.to(torch.float32)
         tensor.erfinv_()
+        tensor = tensor.to(torch.float16)
     else:
         tensor.erfinv_()
     tensor.add_(mean)
     # Clamp to ensure it's in the proper range
+    if tensor.dtype == torch.float16:
+        # The `clamp_` op is not (yet?) defined in float16
+        tensor = tensor.to(torch.float32)
+        tensor.clamp_(min=a, max=b)
+        tensor = tensor.to(torch.float16)
+    else:
+        tensor.clamp_(min=a, max=b)
 def trunc_normal_tf_(
             nn.init.normal_(module.attention.in_proj_weight.data)
             nn.init.zeros_(module.attention.in_proj_bias.data)
         elif isinstance(module, SiglipModel):
+            logit_scale_init = torch.tensor(0.0)
             module.logit_scale.data.fill_(logit_scale_init)
             module.logit_bias.data.zero_()
         elif isinstance(module, (nn.Linear, nn.Conv2d)):