|
"""
|
|
Copyright (c) 2022, salesforce.com, inc.
|
|
All rights reserved.
|
|
SPDX-License-Identifier: BSD-3-Clause
|
|
For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
|
|
"""
|
|
|
|
import re
|
|
|
|
from minigpt4.common.registry import registry
|
|
from minigpt4.processors.base_processor import BaseProcessor
|
|
from minigpt4.processors.randaugment import RandomAugment
|
|
from omegaconf import OmegaConf
|
|
from torchvision import transforms
|
|
from torchvision.transforms.functional import InterpolationMode
|
|
from transformers import CLIPImageProcessor
|
|
|
|
|
|
|
|
@registry.register_processor("clip_image_train")
|
|
class ClipImageTrainProcessor(BaseProcessor):
|
|
def __init__(self, proc_type, do_normalize=True):
|
|
super().__init__()
|
|
|
|
self.transform = CLIPImageProcessor.from_pretrained(proc_type)
|
|
self.transform.do_normalize = True if do_normalize else False
|
|
|
|
def __call__(self, item):
|
|
return self.transform.preprocess(item, return_tensors='pt')['pixel_values'][0]
|
|
|
|
@classmethod
|
|
def from_config(cls, cfg=None):
|
|
if cfg is None:
|
|
cfg = OmegaConf.create()
|
|
|
|
proc_type = cfg.get("proc_type", r'openai/clip-vit-large-patch14')
|
|
|
|
do_normalize = cfg.get("do_normalize", True)
|
|
|
|
return cls(proc_type=proc_type, do_normalize=do_normalize)
|
|
|
|
|
|
@registry.register_processor("clip_image_eval")
|
|
class ClipImageEvalProcessor(BaseProcessor):
|
|
def __init__(self, proc_type, do_normalize=True):
|
|
super().__init__()
|
|
|
|
self.transform = CLIPImageProcessor.from_pretrained(proc_type)
|
|
self.transform.do_normalize = True if do_normalize else False
|
|
|
|
def __call__(self, item):
|
|
return self.transform.preprocess(item, return_tensors='pt')['pixel_values'][0]
|
|
|
|
@classmethod
|
|
def from_config(cls, cfg=None):
|
|
if cfg is None:
|
|
cfg = OmegaConf.create()
|
|
|
|
proc_type = cfg.get("proc_type", r'openai/clip-vit-large-patch14')
|
|
|
|
do_normalize = cfg.get("do_normalize", True)
|
|
|
|
return cls(proc_type=proc_type, do_normalize=do_normalize)
|
|
|
|
@registry.register_processor("clip_image_train_336")
|
|
class ClipImageTrainProcessor(BaseProcessor):
|
|
def __init__(self, proc_type, do_normalize=True):
|
|
super().__init__()
|
|
|
|
self.transform = CLIPImageProcessor.from_pretrained(proc_type)
|
|
self.transform.do_normalize = True if do_normalize else False
|
|
|
|
def __call__(self, item):
|
|
return self.transform.preprocess(item, return_tensors='pt')['pixel_values'][0]
|
|
|
|
@classmethod
|
|
def from_config(cls, cfg=None):
|
|
if cfg is None:
|
|
cfg = OmegaConf.create()
|
|
|
|
proc_type = cfg.get("proc_type", r'openai/clip-vit-large-patch14-336')
|
|
|
|
do_normalize = cfg.get("do_normalize", True)
|
|
|
|
return cls(proc_type=proc_type, do_normalize=do_normalize)
|
|
|
|
|
|
@registry.register_processor("clip_image_eval_336")
|
|
class ClipImageEvalProcessor(BaseProcessor):
|
|
def __init__(self, proc_type, do_normalize=True):
|
|
super().__init__()
|
|
|
|
self.transform = CLIPImageProcessor.from_pretrained(proc_type)
|
|
self.transform.do_normalize = True if do_normalize else False
|
|
|
|
def __call__(self, item):
|
|
return self.transform.preprocess(item, return_tensors='pt')['pixel_values'][0]
|
|
|
|
@classmethod
|
|
def from_config(cls, cfg=None):
|
|
if cfg is None:
|
|
cfg = OmegaConf.create()
|
|
|
|
proc_type = cfg.get("proc_type", r'openai/clip-vit-large-patch14-336')
|
|
|
|
do_normalize = cfg.get("do_normalize", True)
|
|
|
|
return cls(proc_type=proc_type, do_normalize=do_normalize) |