p1atdev
/

siglip-tagger-test-3

+from PIL import Image
+import torch
+from transformers import (
+    AutoModelForImageClassification,
+    AutoImageProcessor,
+    Pipeline,
+)
+import numpy as np
+from typing import Union
+class SiglipTaggerPipe(Pipeline):
+    def __init__(self,**kwargs):
+      self.processor = AutoImageProcessor.from_pretrained("p1atdev/siglip-tagger-test-3")
+      if "torch_dtype" not in kwargs :
+        kwargs["torch_dtype"] = torch.bfloat16
+      Pipeline.__init__(self,**kwargs)
+    def _sanitize_parameters(self, **kwargs):
+      postprocess_kwargs = {}
+      if "threshold" in kwargs :
+        # if threshold parameter is present
+        # we pass it to the postprocess method
+        postprocess_kwargs["threshold"] = kwargs["threshold"]
+      return {},{},postprocess_kwargs
+    def preprocess(self,inputs: Union[str,Image.Image,np.ndarray]):
+      if isinstance(inputs,str) :
+          img =  Image.open(inputs)
+      elif isinstance(inputs,Image.Image) :
+          img = inputs
+      else :
+        # TODO: double check this implementation
+        # consider adding try except
+        # maybe add url checker too
+        img = Image.fromarray(inputs)
+      inputs = self.processor(img, return_tensors="pt").to(self.model.device, self.model.dtype)
+      return inputs
+    def _forward(self,inputs):
+      logits = self.model(**inputs).logits.detach().cpu().float()[0]
+      logits = np.clip(logits, 0.0, 1.0)
+      return logits
+    def postprocess(self,logits,threshold:float=0):
+      results = {
+          self.model.config.id2label[i]: logit for i, logit in enumerate(logits) if logit > 0
+      }
+      results = sorted(results.items(), key=lambda x: x[1], reverse=True)
+      out = {}
+      for tag, score in results:
+        if score >= threshold :
+          out[tag] = f"{score*100:.2f}"
+      return out

config.json CHANGED Viewed

@@ -1,10 +1,22 @@
 {
-  "_name_or_path": "google/siglip-so400m-patch14-384",
-  "architectures": ["SiglipForImageClassification"],
   "auto_map": {
-    "AutoModelForImageClassification": "modeling_siglip.SiglipForImageClassification"
   },
-  "attention_dropout": 0.0,
   "hidden_act": "gelu_pytorch_tanh",
   "hidden_size": 1152,
   "id2label": {
@@ -19047,13 +19059,13 @@
     "zzz": 9515,
     "|_": 9516
   },
-  "layer_norm_eps": 1e-6,
   "model_type": "siglip_vision_model",
   "num_attention_heads": 16,
   "num_channels": 3,
   "num_hidden_layers": 27,
   "patch_size": 14,
   "problem_type": "multi_label_classification",
-  "torch_dtype": "bfloat16",
   "transformers_version": "4.37.2"
 }

 {
+  "_name_or_path": "p1atdev/siglip-tagger-test-3",
+  "architectures": [
+    "SiglipForImageClassification"
+  ],
+  "attention_dropout": 0.0,
   "auto_map": {
+    "AutoModelForImageClassification": "p1atdev/siglip-tagger-test-3--modeling_siglip.SiglipForImageClassification"
+  },
+  "custom_pipelines": {
+    "image-classification": {
+      "impl": "CustomPipe.SiglipTaggerPipe",
+      "pt": [
+        "AutoModelForImageClassification"
+      ],
+      "tf": [],
+      "type": "image"
+    }
   },
   "hidden_act": "gelu_pytorch_tanh",
   "hidden_size": 1152,
   "id2label": {
     "zzz": 9515,
     "|_": 9516
   },
+  "layer_norm_eps": 1e-06,
   "model_type": "siglip_vision_model",
   "num_attention_heads": 16,
   "num_channels": 3,
   "num_hidden_layers": 27,
   "patch_size": 14,
   "problem_type": "multi_label_classification",
+  "torch_dtype": "float32",
   "transformers_version": "4.37.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce46ef29aec79fcf0fbe8280521acb10381ef5000706af5f870c08af781fb3eb
-size 878455682

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c57dce403a3fbb0b10dd311cd84cc12ecbf884ae444f54aa6f941f5fb3e06f7
+size 1756853084