File size: 3,701 Bytes
bc85824 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
"""
Copyright (c) 2022, salesforce.com, inc.
All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
"""
import re
from minigpt4.common.registry import registry
from minigpt4.processors.base_processor import BaseProcessor
from minigpt4.processors.randaugment import RandomAugment
from omegaconf import OmegaConf
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
from transformers import CLIPImageProcessor
@registry.register_processor("clip_image_train")
class ClipImageTrainProcessor(BaseProcessor):
def __init__(self, proc_type, do_normalize=True):
super().__init__()
self.transform = CLIPImageProcessor.from_pretrained(proc_type)
self.transform.do_normalize = True if do_normalize else False
def __call__(self, item):
return self.transform.preprocess(item, return_tensors='pt')['pixel_values'][0]
@classmethod
def from_config(cls, cfg=None):
if cfg is None:
cfg = OmegaConf.create()
proc_type = cfg.get("proc_type", r'openai/clip-vit-large-patch14')
do_normalize = cfg.get("do_normalize", True)
return cls(proc_type=proc_type, do_normalize=do_normalize)
@registry.register_processor("clip_image_eval")
class ClipImageEvalProcessor(BaseProcessor):
def __init__(self, proc_type, do_normalize=True):
super().__init__()
self.transform = CLIPImageProcessor.from_pretrained(proc_type)
self.transform.do_normalize = True if do_normalize else False
def __call__(self, item):
return self.transform.preprocess(item, return_tensors='pt')['pixel_values'][0]
@classmethod
def from_config(cls, cfg=None):
if cfg is None:
cfg = OmegaConf.create()
proc_type = cfg.get("proc_type", r'openai/clip-vit-large-patch14')
do_normalize = cfg.get("do_normalize", True)
return cls(proc_type=proc_type, do_normalize=do_normalize)
@registry.register_processor("clip_image_train_336")
class ClipImageTrainProcessor(BaseProcessor):
def __init__(self, proc_type, do_normalize=True):
super().__init__()
self.transform = CLIPImageProcessor.from_pretrained(proc_type)
self.transform.do_normalize = True if do_normalize else False
def __call__(self, item):
return self.transform.preprocess(item, return_tensors='pt')['pixel_values'][0]
@classmethod
def from_config(cls, cfg=None):
if cfg is None:
cfg = OmegaConf.create()
proc_type = cfg.get("proc_type", r'openai/clip-vit-large-patch14-336')
do_normalize = cfg.get("do_normalize", True)
return cls(proc_type=proc_type, do_normalize=do_normalize)
@registry.register_processor("clip_image_eval_336")
class ClipImageEvalProcessor(BaseProcessor):
def __init__(self, proc_type, do_normalize=True):
super().__init__()
self.transform = CLIPImageProcessor.from_pretrained(proc_type)
self.transform.do_normalize = True if do_normalize else False
def __call__(self, item):
return self.transform.preprocess(item, return_tensors='pt')['pixel_values'][0]
@classmethod
def from_config(cls, cfg=None):
if cfg is None:
cfg = OmegaConf.create()
proc_type = cfg.get("proc_type", r'openai/clip-vit-large-patch14-336')
do_normalize = cfg.get("do_normalize", True)
return cls(proc_type=proc_type, do_normalize=do_normalize) |