Slep
/

CondViT-B16-txt

Feature Extraction

lrvsf-benchmark

Model card Files Files and versions Community

Slep commited on Apr 9, 2024

Commit

6b537e8

·

verified ·

1 Parent(s): fbea38f

Optional texts in processor.

Files changed (1) hide show

processor.py +10 -9

processor.py CHANGED Viewed

@@ -38,7 +38,7 @@ class CondViTProcessor(ImageProcessingMixin):
         img = F.normalize(img, self.image_mean, self.image_std)
         return img
-    def __call__(self, images, texts):
         """
         Parameters
         ----------
@@ -55,14 +55,15 @@ class CondViTProcessor(ImageProcessingMixin):
             texts : Union[str, List[str]]
         """
         # Single Image
         if isinstance(images, Image.Image):
-            return BatchFeature(
-                data={"pixel_values": self.process_img(images), "texts": texts}
             )
-        return BatchFeature(
-            data={
-                "pixel_values": torch.stack([self.process_img(img) for img in images]),
-                "texts": texts,
-            }
-        )

         img = F.normalize(img, self.image_mean, self.image_std)
         return img
+    def __call__(self, images, texts=None):
         """
         Parameters
         ----------
             texts : Union[str, List[str]]
         """
         # Single Image
+        data = {}
         if isinstance(images, Image.Image):
+            data["pixel_values"] = self.process_img(images)
+        else:
+            data["pixel_values"] = torch.stack(
+                [self.process_img(img) for img in images]
             )
+        if texts is not None:
+            data["texts"] = texts
+        return BatchFeature(data=data)