Spaces:
Build error
Build error
import json | |
import argparse | |
import numpy as np | |
from transformers import (AutoTokenizer, CLIPTextModelWithProjection) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'--model', | |
type=str, | |
default='./pretrained_models/clip-vit-base-patch32-projection') | |
parser.add_argument('--text', | |
type=str, | |
default='data/captions/coco_class_captions.json') | |
parser.add_argument('--out', type=str, default='output.npy') | |
args = parser.parse_args() | |
tokenizer = AutoTokenizer.from_pretrained(args.model) | |
model = CLIPTextModelWithProjection.from_pretrained(args.model) | |
with open(args.text) as f: | |
data = json.load(f) | |
texts = [x[0] for x in data] | |
device = 'cuda:0' | |
model.to(device) | |
texts = tokenizer(text=texts, return_tensors='pt', padding=True) | |
texts = texts.to(device) | |
text_outputs = model(**texts) | |
txt_feats = text_outputs.text_embeds | |
txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True) | |
txt_feats = txt_feats.reshape(-1, txt_feats.shape[-1]) | |
np.save(args.out, txt_feats.cpu().data.numpy()) | |