Spaces:

ImageProcessing
/

backend

Running

Upload 5 files

e2fdd3f 6 months ago

1.46 kB

	from PIL import Image
	import io
	from transformers import AutoTokenizer, CLIPProcessor, CLIPModel
	import torch

	# Load CLIP model and processor
	model_name = "openai/clip-vit-base-patch32"
	loaded_model = CLIPModel.from_pretrained(model_name)
	loaded_processor = CLIPProcessor.from_pretrained(model_name)

	def getTextEmbedding(text):
	# Preprocess the text
	print("tear")
	inputs_text = loaded_processor(text=[text], return_tensors="pt", padding=True)
	print("here")
	# Forward pass through the model
	with torch.no_grad():
	# Get the text features
	text_features = loaded_model.get_text_features(input_ids=inputs_text.input_ids, attention_mask=inputs_text.attention_mask)
	print("bear")
	# Convert tensor to numpy array for better readability
	text_embedding = text_features.squeeze().numpy()
	print("done")
	return text_embedding

	def getImageEmbedding(binary_image_data):
	# Load and preprocess the image
	image = Image.open(io.BytesIO(binary_image_data))
	inputs = loaded_processor(images=image, return_tensors="pt", padding=True)

	# Forward pass through the model
	with torch.no_grad():
	# Get the image features
	image_features = loaded_model.get_image_features(pixel_values=inputs.pixel_values)

	# Convert tensor to numpy array for better readability
	image_embedding = image_features.squeeze().numpy()

	return image_embedding