sohojoe's picture
experiments with open_clip, templates, clustering, recursion
b2b5d5f
import json
import os
import numpy as np
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from PIL import Image
from io import BytesIO
import torch
from clip_retrieval.load_clip import load_clip, get_tokenizer
class ClipAppClient:
"""
A class to handle generating embeddings using the OpenAI CLIP model.
app_client = ClipAppClient()
test_image_url = "https://example.com/image.jpg"
preprocessed_image = app_client.preprocess_image(test_image_url)
text = "A beautiful landscape"
text_embeddings = app_client.text_to_embedding(text)
image_embeddings = app_client.image_url_to_embedding(test_image_url)
preprocessed_image_embeddings = app_client.preprocessed_image_to_embedding(preprocessed_image)
"""
def __init__(self, clip_model="ViT-L/14", device=None):
# def __init__(self, clip_model="open_clip:ViT-H-14", device=None):
self.clip_model = clip_model
self.device = device or ("cuda:0" if torch.cuda.is_available() else "cpu")
print("using device", self.device)
_, self.preprocess = load_clip(clip_model, use_jit=True, device=self.device)
self.tokenizer = get_tokenizer(clip_model)
def preprocess_image(self, image_url):
"""
Preprocess an image from a given URL.
:param image_url: str, URL of the image to preprocess
:return: torch.Tensor, preprocessed image
"""
if os.path.isfile(image_url):
input_image = Image.open(image_url).convert('RGB')
input_image = np.array(input_image)
input_image = Image.fromarray(input_image)
else:
response = requests.get(image_url)
input_image = Image.open(BytesIO(response.content)).convert('RGB')
input_image = np.array(input_image)
input_image = Image.fromarray(input_image)
prepro = self.preprocess(input_image).unsqueeze(0).cpu()
return prepro
def text_to_embedding(self, text):
"""
Convert a given text to an embedding using the OpenAI CLIP model.
:param text: str, text to convert to an embedding
:return: str, text embeddings
"""
payload = {
"text": ('str', text, 'application/octet-stream'),
}
url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/")
response = requests.post(url, files=payload)
embeddings = response.text
embeddings = json.loads(embeddings)
embeddings = torch.tensor(embeddings)
return embeddings
def image_url_to_embedding(self, image_url):
"""
Convert an image URL to an embedding using the OpenAI CLIP model.
:param image_url: str, URL of the image to convert to an embedding
:return: str, image embeddings
"""
payload = {
"image_url": ('str', image_url, 'application/octet-stream'),
}
url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/")
response = requests.post(url, files=payload)
embeddings = response.text
embeddings = json.loads(embeddings)
embeddings = torch.tensor(embeddings)
return embeddings
def preprocessed_image_to_embedding(self, image):
"""
Convert a preprocessed image to an embedding using the OpenAI CLIP model.
:param image: torch.Tensor, preprocessed image
:return: str, image embeddings
"""
key = "preprocessed_image"
data_bytes = image.numpy().tobytes()
shape_bytes = np.array(image.shape).tobytes()
dtype_bytes = str(image.dtype).encode()
payload = {
key: ('tensor', data_bytes, 'application/octet-stream'),
'shape': ('shape', shape_bytes, 'application/octet-stream'),
'dtype': ('dtype', dtype_bytes, 'application/octet-stream'),
}
url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/")
response = requests.post(url, files=payload)
embeddings = response.text
embeddings = json.loads(embeddings)
embeddings = torch.tensor(embeddings)
return embeddings