import annoy from typing import Tuple import json EMBEDDING_DIMENSION = 4096 ANNOY_INDEX_FILE = "models/index.ann" ANNOY_MAPPING_FILE = "models/mappings.json" IMG_RESIZE_SIZE = 224 #### def load_annoy_index( index_file=ANNOY_INDEX_FILE, mapping_file=ANNOY_MAPPING_FILE, ) -> Tuple[annoy.AnnoyIndex, dict]: """Load annoy index and associated mapping file""" annoy_index = annoy.AnnoyIndex(f=EMBEDDING_DIMENSION, metric='angular') annoy_index.load(index_file) with open(ANNOY_MAPPING_FILE) as f: mappings = json.load(f) with open(mapping_file) as f: mapping = json.load(f) mapping = {int(k): v for k, v in mapping.items()} return annoy_index, mappings ### import torch from torch import nn from torchvision import models, transforms # from torchvision.models.vgg import VGG16_Weights # Transform the image, so it becomes readable with the model transform = transforms.Compose([ transforms.ToPILImage(), # transforms.CenterCrop(512), # transforms.Resize(448), transforms.Resize((IMG_RESIZE_SIZE, IMG_RESIZE_SIZE)), transforms.ToTensor() ]) import cv2 class FeatureExtractor(nn.Module): def __init__(self, model): super(FeatureExtractor, self).__init__() # Extract VGG-16 Feature Layers self.features = list(model.features) self.features = nn.Sequential(*self.features) # Extract VGG-16 Average Pooling Layer self.pooling = model.avgpool # Convert the image into one-dimensional vector self.flatten = nn.Flatten() # Extract the first part of fully-connected layer from VGG16 self.fc = model.classifier[0] def forward(self, x): # It will take the input 'x' until it returns the feature vector called 'out' out = self.features(x) out = self.pooling(out) out = self.flatten(out) out = self.fc(out) return out # Initialize the model # model = models.vgg16(weights=VGG16_Weights.DEFAULT) model = models.vgg16(pretrained=True) new_model = FeatureExtractor(model) # Change the device to GPU device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") new_model = new_model.to(device) import PIL def analyze_image( image, annoy_index, n_matches: int = 1, num_jitters: int = 1, model: str = "large" ): PIL.Image.fromarray(image).save("input_img.png") img = cv2.imread("input_img.png") # Transform the image img = transform(img) # Reshape the image. PyTorch model reads 4-dimensional tensor # [batch_size, channels, width, height] # img = img.reshape(1, 3, 448, 448) img = img.reshape(1, 3, IMG_RESIZE_SIZE, IMG_RESIZE_SIZE) img = img.to(device) # We only extract features, so we don't need gradient with torch.no_grad(): # Extract the feature from the image feature = new_model(img) # Convert to NumPy Array, Reshape it, and save it to features variable v = feature.cpu().detach().numpy().reshape(-1) results = annoy_index.get_nns_by_vector(v, n_matches, include_distances=True) return results