sinder / visualize.py
haoqiwang's picture
add files
9ae1b1e
#!/usr/bin/env python
import argparse
import os
import sys
from pathlib import Path
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm
from sinder import (
get_tokens,
load_model,
load_visual_data,
pca_array,
)
os.environ['XFORMERS_DISABLED'] = '1'
def parse_args():
parser = argparse.ArgumentParser(description='Visualize')
parser.add_argument(
'imgs', nargs='+', type=str, help='path to image/images'
)
parser.add_argument(
'--model', type=str, default='dinov2_vitg14', help='model name'
)
parser.add_argument('--workdir', type=str, default='visualize')
parser.add_argument(
'--checkpoint',
type=str,
default=None,
help='Path to checkpoint. Default is None, which loads the official pretrained weights',
)
parser.add_argument(
'--visual_size',
type=int,
default=518,
help='short side size of input image',
)
args = parser.parse_args()
return args
def visualize(args, model, visual_dataset):
model.eval()
for d in tqdm(range(len(visual_dataset))):
visual_image = visual_dataset[d]
visual_tokens_all = get_tokens(model, visual_image)
visual_tokens, visual_tokens_cls = zip(*visual_tokens_all)
filename = Path(visual_dataset.files[d]).stem
t = visual_tokens[-1].detach().cpu()
h, w, c = t.shape
norm = ((t.norm(dim=-1) / t.norm(dim=-1).max()) * 255).byte().numpy()
norm_img = Image.fromarray(norm).resize((w * 14, h * 14), 0)
norm = cv2.applyColorMap(np.array(norm_img), cv2.COLORMAP_JET)
cv2.imwrite(args.folder / f'{filename}_norm.png', norm)
pca_img = pca_array(visual_tokens[-1])
pca_img.save(args.folder / f'{filename}_pca.png')
def main():
args = parse_args()
args.folder = Path(args.workdir).expanduser()
os.makedirs(args.folder, exist_ok=True)
print(args)
print(' '.join(sys.argv))
model = load_model(args.model, args.checkpoint)
visual_dataset = load_visual_data(args, model)
visualize(args, model, visual_dataset)
if __name__ == '__main__':
main()