Spaces:

sohojoe
/

soho-clip-embeddings-explorer

Running

App Files Files Community

soho-clip-embeddings-explorer / experimental /clip_app_client.py

sohojoe

experiments with open_clip, templates, clustering, recursion

b2b5d5f about 1 year ago

raw history blame contribute delete

No virus

4.17 kB

	import json
	import os
	import numpy as np
	import requests
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from PIL import Image
	from io import BytesIO
	import torch

	from clip_retrieval.load_clip import load_clip, get_tokenizer


	class ClipAppClient:
	"""
	A class to handle generating embeddings using the OpenAI CLIP model.

	app_client = ClipAppClient()

	test_image_url = "https://example.com/image.jpg"
	preprocessed_image = app_client.preprocess_image(test_image_url)

	text = "A beautiful landscape"
	text_embeddings = app_client.text_to_embedding(text)

	image_embeddings = app_client.image_url_to_embedding(test_image_url)

	preprocessed_image_embeddings = app_client.preprocessed_image_to_embedding(preprocessed_image)
	"""

	def __init__(self, clip_model="ViT-L/14", device=None):
	# def __init__(self, clip_model="open_clip:ViT-H-14", device=None):
	self.clip_model = clip_model
	self.device = device or ("cuda:0" if torch.cuda.is_available() else "cpu")
	print("using device", self.device)
	_, self.preprocess = load_clip(clip_model, use_jit=True, device=self.device)
	self.tokenizer = get_tokenizer(clip_model)

	def preprocess_image(self, image_url):
	"""
	Preprocess an image from a given URL.

	:param image_url: str, URL of the image to preprocess
	:return: torch.Tensor, preprocessed image
	"""
	if os.path.isfile(image_url):
	input_image = Image.open(image_url).convert('RGB')
	input_image = np.array(input_image)
	input_image = Image.fromarray(input_image)
	else:
	response = requests.get(image_url)
	input_image = Image.open(BytesIO(response.content)).convert('RGB')
	input_image = np.array(input_image)
	input_image = Image.fromarray(input_image)
	prepro = self.preprocess(input_image).unsqueeze(0).cpu()
	return prepro

	def text_to_embedding(self, text):
	"""
	Convert a given text to an embedding using the OpenAI CLIP model.

	:param text: str, text to convert to an embedding
	:return: str, text embeddings
	"""
	payload = {
	"text": ('str', text, 'application/octet-stream'),
	}
	url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/")
	response = requests.post(url, files=payload)
	embeddings = response.text
	embeddings = json.loads(embeddings)
	embeddings = torch.tensor(embeddings)
	return embeddings

	def image_url_to_embedding(self, image_url):
	"""
	Convert an image URL to an embedding using the OpenAI CLIP model.

	:param image_url: str, URL of the image to convert to an embedding
	:return: str, image embeddings
	"""
	payload = {
	"image_url": ('str', image_url, 'application/octet-stream'),
	}
	url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/")
	response = requests.post(url, files=payload)
	embeddings = response.text
	embeddings = json.loads(embeddings)
	embeddings = torch.tensor(embeddings)
	return embeddings

	def preprocessed_image_to_embedding(self, image):
	"""
	Convert a preprocessed image to an embedding using the OpenAI CLIP model.

	:param image: torch.Tensor, preprocessed image
	:return: str, image embeddings
	"""
	key = "preprocessed_image"
	data_bytes = image.numpy().tobytes()
	shape_bytes = np.array(image.shape).tobytes()
	dtype_bytes = str(image.dtype).encode()
	payload = {
	key: ('tensor', data_bytes, 'application/octet-stream'),
	'shape': ('shape', shape_bytes, 'application/octet-stream'),
	'dtype': ('dtype', dtype_bytes, 'application/octet-stream'),
	}
	url = os.environ.get("HTTP_ADDRESS", "http://127.0.0.1:8000/")
	response = requests.post(url, files=payload)
	embeddings = response.text
	embeddings = json.loads(embeddings)
	embeddings = torch.tensor(embeddings)
	return embeddings