import json import os import numpy as np import requests from concurrent.futures import ThreadPoolExecutor, as_completed from PIL import Image from io import BytesIO import torch from clip_retrieval.load_clip import load_clip, get_tokenizer class ClipAppClient: """ A class to handle generating embeddings using the OpenAI CLIP model. app_client = ClipAppClient() test_image_url = "https://example.com/image.jpg" preprocessed_image = app_client.preprocess_image(test_image_url) text = "A beautiful landscape" text_embeddings = app_client.text_to_embedding(text) image_embeddings = app_client.image_url_to_embedding(test_image_url) preprocessed_image_embeddings = app_client.preprocessed_image_to_embedding(preprocessed_image) """ def __init__(self, clip_model="ViT-L/14", device=None): # def __init__(self, clip_model="open_clip:ViT-H-14", device=None): self.clip_model = clip_model self.device = device or ("cuda:0" if torch.cuda.is_available() else "cpu") print("using device", self.device) _, self.preprocess = load_clip(clip_model, use_jit=True, device=self.device) self.tokenizer = get_tokenizer(clip_model) def preprocess_image(self, image_url): """ Preprocess an image from a given URL. :param image_url: str, URL of the image to preprocess :return: torch.Tensor, preprocessed image """ if os.path.isfile(image_url): input_image = Image.open(image_url).convert('RGB') input_image = np.array(input_image) input_image = Image.fromarray(input_image) else: response = requests.get(image_url) input_image = Image.open(BytesIO(response.content)).convert('RGB') input_image = np.array(input_image) input_image = Image.fromarray(input_image) prepro = self.preprocess(input_image).unsqueeze(0).cpu() return prepro def text_to_embedding(self, text): """ Convert a given text to an embedding using the OpenAI CLIP model. :param text: str, text to convert to an embedding :return: str, text embeddings """ payload = { "text": ('str', text, 'application/octet-stream'), } url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/") response = requests.post(url, files=payload) embeddings = response.text embeddings = json.loads(embeddings) embeddings = torch.tensor(embeddings) return embeddings def image_url_to_embedding(self, image_url): """ Convert an image URL to an embedding using the OpenAI CLIP model. :param image_url: str, URL of the image to convert to an embedding :return: str, image embeddings """ payload = { "image_url": ('str', image_url, 'application/octet-stream'), } url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/") response = requests.post(url, files=payload) embeddings = response.text embeddings = json.loads(embeddings) embeddings = torch.tensor(embeddings) return embeddings def preprocessed_image_to_embedding(self, image): """ Convert a preprocessed image to an embedding using the OpenAI CLIP model. :param image: torch.Tensor, preprocessed image :return: str, image embeddings """ key = "preprocessed_image" data_bytes = image.numpy().tobytes() shape_bytes = np.array(image.shape).tobytes() dtype_bytes = str(image.dtype).encode() payload = { key: ('tensor', data_bytes, 'application/octet-stream'), 'shape': ('shape', shape_bytes, 'application/octet-stream'), 'dtype': ('dtype', dtype_bytes, 'application/octet-stream'), } url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/") response = requests.post(url, files=payload) embeddings = response.text embeddings = json.loads(embeddings) embeddings = torch.tensor(embeddings) return embeddings