GPT-K / knowledge /transforms.py
cwkuo
implement gpt-k demo
7962ed0
raw
history blame
1.33 kB
import itertools
from torchvision.transforms import functional as F
import re
def five_crop(image, ratio=0.6):
w, h = image.size
hw = (h*ratio, w*ratio)
return F.five_crop(image, hw)
def nine_crop(image, ratio=0.4):
w, h = image.size
t = (0, int((0.5-ratio/2)*h), int((1.0 - ratio)*h))
b = (int(ratio*h), int((0.5+ratio/2)*h), h)
l = (0, int((0.5-ratio/2)*w), int((1.0 - ratio)*w))
r = (int(ratio*w), int((0.5+ratio/2)*w), w)
h, w = list(zip(t, b)), list(zip(l, r))
images = []
for s in itertools.product(h, w):
h, w = s
top, left = h[0], w[0]
height, width = h[1]-h[0], w[1]-w[0]
images.append(F.crop(image, top, left, height, width))
return images
def pre_caption(caption, max_words=None):
# Ref: https://github.com/salesforce/LAVIS/blob/main/lavis/processors/blip_processors.py#L49-L68
caption = re.sub(
r"([.!\"()*#:;~])",
" ",
caption.lower(),
)
caption = re.sub(
r"\s{2,}",
" ",
caption,
)
caption = caption.rstrip("\n")
caption = caption.strip(" ")
# truncate caption
caption_words = caption.split(" ")
if max_words is not None and len(caption_words) > max_words:
caption = " ".join(caption_words[: max_words])
return caption