import cv2
import numpy as np
from PIL import Image

import torch
from transformers import AutoProcessor, CLIPModel

from annotator.util import annotator_ckpts_path


class ContentDetector:
    def __init__(self, model_name="openai/clip-vit-large-patch14"):

        self.model = CLIPModel.from_pretrained(model_name, cache_dir=annotator_ckpts_path).cuda().eval()
        self.processor = AutoProcessor.from_pretrained(model_name, cache_dir=annotator_ckpts_path)

    def __call__(self, img):
        with torch.no_grad():
            img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            inputs = self.processor(images=[img], return_tensors="pt").to('cuda')
            image_features = self.model.get_image_features(**inputs)
            content_emb = image_features[0].detach().cpu().numpy()
        return content_emb