File size: 1,979 Bytes
f7fe9cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5727028
f7fe9cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5db638
 
f7fe9cd
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import numpy as np
import torch
from PIL import Image
from transformers.image_processing_utils import BaseImageProcessor
from transformers.utils import logging

logger = logging.get_logger(__name__)


class VQModelImageProcessor(BaseImageProcessor):  # type: ignore
    def __init__(
        self,
        size: int = 256,
        convert_rgb: bool = False,
        resample: Image.Resampling = Image.Resampling.LANCZOS,
        **kwargs: dict,
    ) -> None:
        self.size = size
        self.convert_rgb = convert_rgb
        self.resample = resample

    def __call__(self, image: Image.Image) -> dict:
        return self.preprocess(image)

    def preprocess(self, image: Image.Image) -> dict:
        width, height = image.size
        size = (self.size, self.size)
        image = image.resize(size, resample=self.resample)
        image = image.convert("RGBA")

        if self.convert_rgb:
            # Paste RGBA image on white background
            image_new = Image.new("RGB", image.size, (255, 255, 255))
            image_new.paste(image, mask=image.split()[3])
            image = image_new

        return {
            "image": self.to_tensor(image),
            "width": width,
            "height": height,
        }

    def to_tensor(self, image: Image.Image) -> torch.Tensor:
        x = np.array(image) / 127.5 - 1.0
        x = x.transpose(2, 0, 1).astype(np.float32)
        return torch.as_tensor(x)

    def postprocess(
        self,
        x: torch.Tensor,
        width: int | None = None,
        height: int | None = None,
    ) -> Image.Image:
        x_np = x.detach().cpu().numpy()
        x_np = x_np.transpose(1, 2, 0)
        x_np = (x_np + 1.0) * 127.5
        x_np = np.clip(x_np, 0, 255).astype(np.uint8)
        image = Image.fromarray(x_np)

        # Resize image
        width = width or self.size
        height = height or self.size
        image = image.resize((width, height), resample=self.resample)

        return image