import io import base64 import shutil import torch from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer class PreTrainedPipeline(): def __init__(self, path=""): """ Initialize model """ self.model = CLIPModel.from_pretrained(path) self.tokenizer = CLIPTokenizer.from_pretrained(path) def __call__(self, inputs: str): """ Args: inputs (:obj:`str`): a string containing some text Return: A :obj:`list`list of floats: The features computed by the model. """ inputs = self.tokenizer(inputs, padding=True, return_tensors="pt") # Compute text embeddings with torch.no_grad(): text_features = self.model.get_text_features(**inputs) return text_features[0].tolist()