|
from typing import List |
|
import numpy as np |
|
import torch |
|
import ray |
|
from ray import serve |
|
from PIL import Image |
|
from clip_retrieval.load_clip import load_clip, get_tokenizer |
|
|
|
|
|
@serve.deployment(num_replicas=6, ray_actor_options={"num_cpus": .2, "num_gpus": 0.1}) |
|
class CLIPTransform: |
|
def __init__(self): |
|
|
|
|
|
|
|
self.device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
self._clip_model="ViT-L/14" |
|
self._clip_model_id ="laion5B-L-14" |
|
self.model, self.preprocess = load_clip(self._clip_model, use_jit=True, device=self.device) |
|
self.tokenizer = get_tokenizer(self._clip_model) |
|
|
|
print ("using device", self.device) |
|
|
|
@serve.batch(max_batch_size=32) |
|
|
|
def text_to_embeddings(self, prompts: List[str]) -> List[np.ndarray]: |
|
text = self.tokenizer(prompts).to(self.device) |
|
with torch.no_grad(): |
|
prompt_embededdings = self.model.encode_text(text) |
|
prompt_embededdings /= prompt_embededdings.norm(dim=-1, keepdim=True) |
|
prompt_embededdings = prompt_embededdings.cpu().numpy().tolist() |
|
return(prompt_embededdings) |
|
|
|
def image_to_embeddings(self, input_im): |
|
input_im = Image.fromarray(input_im) |
|
prepro = self.preprocess(input_im).unsqueeze(0).to(self.device) |
|
with torch.no_grad(): |
|
image_embeddings = self.model.encode_image(prepro) |
|
image_embeddings /= image_embeddings.norm(dim=-1, keepdim=True) |
|
return(image_embeddings) |
|
|
|
def preprocessed_image_to_emdeddings(self, prepro): |
|
with torch.no_grad(): |
|
image_embeddings = self.model.encode_image(prepro) |
|
image_embeddings /= image_embeddings.norm(dim=-1, keepdim=True) |
|
return(image_embeddings) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
deployment_graph = CLIPTransform.bind() |
|
|