Upload processor
Browse files- preprocessor_config.json +19 -0
- processor.py +68 -0
preprocessor_config.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"auto_map": {
|
3 |
+
"AutoImageProcessor": "processor.CondViTProcessor",
|
4 |
+
"AutoProcessor": "processor.CondViTProcessor"
|
5 |
+
},
|
6 |
+
"bkg_color": 255,
|
7 |
+
"image_mean": [
|
8 |
+
0.48145466,
|
9 |
+
0.4578275,
|
10 |
+
0.40821073
|
11 |
+
],
|
12 |
+
"image_processor_type": "CondViTProcessor",
|
13 |
+
"image_std": [
|
14 |
+
0.26862954,
|
15 |
+
0.26130258,
|
16 |
+
0.27577711
|
17 |
+
],
|
18 |
+
"input_resolution": 224
|
19 |
+
}
|
processor.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers.image_processing_utils import ImageProcessingMixin, BatchFeature
|
2 |
+
|
3 |
+
from torchvision.transforms import transforms as tf
|
4 |
+
import torchvision.transforms.functional as F
|
5 |
+
from PIL import Image
|
6 |
+
import torch
|
7 |
+
|
8 |
+
|
9 |
+
class CondViTProcessor(ImageProcessingMixin):
|
10 |
+
def __init__(
|
11 |
+
self,
|
12 |
+
bkg_color=255,
|
13 |
+
input_resolution=224,
|
14 |
+
image_mean=(0.48145466, 0.4578275, 0.40821073),
|
15 |
+
image_std=(0.26862954, 0.26130258, 0.27577711),
|
16 |
+
**kwargs,
|
17 |
+
):
|
18 |
+
super().__init__(**kwargs)
|
19 |
+
|
20 |
+
self.bkg_color = bkg_color
|
21 |
+
self.input_resolution = input_resolution
|
22 |
+
self.image_mean = image_mean
|
23 |
+
self.image_std = image_std
|
24 |
+
|
25 |
+
def square_pad(self, image):
|
26 |
+
max_wh = max(image.size)
|
27 |
+
p_left, p_top = [(max_wh - s) // 2 for s in image.size]
|
28 |
+
p_right, p_bottom = [
|
29 |
+
max_wh - (s + pad) for s, pad in zip(image.size, [p_left, p_top])
|
30 |
+
]
|
31 |
+
padding = (p_left, p_top, p_right, p_bottom)
|
32 |
+
return F.pad(image, padding, self.bkg_color, "constant")
|
33 |
+
|
34 |
+
def process_img(self, image):
|
35 |
+
img = self.square_pad(image)
|
36 |
+
img = F.resize(img, self.input_resolution)
|
37 |
+
img = F.to_tensor(img)
|
38 |
+
img = F.normalize(img, self.image_mean, self.image_std)
|
39 |
+
return img
|
40 |
+
|
41 |
+
def __call__(self, images, texts):
|
42 |
+
"""
|
43 |
+
Parameters
|
44 |
+
----------
|
45 |
+
images : Union[Image.Image, List[Image.Image]]
|
46 |
+
Image or list of images to process
|
47 |
+
texts : Union[str, List[str]]
|
48 |
+
Text or list of texts to process. Pass through, no operation is performed.
|
49 |
+
|
50 |
+
Returns
|
51 |
+
-------
|
52 |
+
BatchFeature
|
53 |
+
pixel_values : torch.Tensor
|
54 |
+
Processed image tensor (B C H W)
|
55 |
+
texts : Union[str, List[str]]
|
56 |
+
"""
|
57 |
+
# Single Image
|
58 |
+
if isinstance(images, Image.Image):
|
59 |
+
images = BatchFeature(
|
60 |
+
data={"pixel_values": self.process_img(images), "texts": texts}
|
61 |
+
)
|
62 |
+
|
63 |
+
return BatchFeature(
|
64 |
+
data={
|
65 |
+
"pixel_values": torch.stack([self.process_img(img) for img in images]),
|
66 |
+
"texts": texts,
|
67 |
+
}
|
68 |
+
)
|