ImageProcessing's picture
Upload 5 files
e2fdd3f
from PIL import Image
import io
from transformers import AutoTokenizer, CLIPProcessor, CLIPModel
import torch
# Load CLIP model and processor
model_name = "openai/clip-vit-base-patch32"
loaded_model = CLIPModel.from_pretrained(model_name)
loaded_processor = CLIPProcessor.from_pretrained(model_name)
def getTextEmbedding(text):
# Preprocess the text
print("tear")
inputs_text = loaded_processor(text=[text], return_tensors="pt", padding=True)
print("here")
# Forward pass through the model
with torch.no_grad():
# Get the text features
text_features = loaded_model.get_text_features(input_ids=inputs_text.input_ids, attention_mask=inputs_text.attention_mask)
print("bear")
# Convert tensor to numpy array for better readability
text_embedding = text_features.squeeze().numpy()
print("done")
return text_embedding
def getImageEmbedding(binary_image_data):
# Load and preprocess the image
image = Image.open(io.BytesIO(binary_image_data))
inputs = loaded_processor(images=image, return_tensors="pt", padding=True)
# Forward pass through the model
with torch.no_grad():
# Get the image features
image_features = loaded_model.get_image_features(pixel_values=inputs.pixel_values)
# Convert tensor to numpy array for better readability
image_embedding = image_features.squeeze().numpy()
return image_embedding