Spaces:
Running
Running
import importlib | |
import os | |
import random | |
import cv2 | |
import numpy as np | |
import torch | |
import torch.nn.functional as F | |
from transformers import PretrainedConfig | |
def seed_everything(seed): | |
os.environ["PL_GLOBAL_SEED"] = str(seed) | |
random.seed(seed) | |
np.random.seed(seed) | |
torch.manual_seed(seed) | |
torch.cuda.manual_seed_all(seed) | |
def is_torch2_available(): | |
return hasattr(F, "scaled_dot_product_attention") | |
def instantiate_from_config(config): | |
if "target" not in config: | |
if config == '__is_first_stage__' or config == "__is_unconditional__": | |
return None | |
raise KeyError("Expected key `target` to instantiate.") | |
return get_obj_from_str(config["target"])(**config.get("params", {})) | |
def get_obj_from_str(string, reload=False): | |
module, cls = string.rsplit(".", 1) | |
if reload: | |
module_imp = importlib.import_module(module) | |
importlib.reload(module_imp) | |
return getattr(importlib.import_module(module, package=None), cls) | |
def drop_seq_token(seq, drop_rate=0.5): | |
idx = torch.randperm(seq.size(1)) | |
num_keep_tokens = int(len(idx) * (1 - drop_rate)) | |
idx = idx[:num_keep_tokens] | |
seq = seq[:, idx] | |
return seq | |
def import_model_class_from_model_name_or_path( | |
pretrained_model_name_or_path: str, revision: str, subfolder: str = "text_encoder" | |
): | |
text_encoder_config = PretrainedConfig.from_pretrained( | |
pretrained_model_name_or_path, subfolder=subfolder, revision=revision | |
) | |
model_class = text_encoder_config.architectures[0] | |
if model_class == "CLIPTextModel": | |
from transformers import CLIPTextModel | |
return CLIPTextModel | |
elif model_class == "CLIPTextModelWithProjection": # noqa RET505 | |
from transformers import CLIPTextModelWithProjection | |
return CLIPTextModelWithProjection | |
else: | |
raise ValueError(f"{model_class} is not supported.") | |
def resize_numpy_image_long(image, resize_long_edge=768): | |
h, w = image.shape[:2] | |
if max(h, w) <= resize_long_edge: | |
return image | |
k = resize_long_edge / max(h, w) | |
h = int(h * k) | |
w = int(w * k) | |
image = cv2.resize(image, (w, h), interpolation=cv2.INTER_LANCZOS4) | |
return image | |