Spaces:

nakamura196
/

ann-kunshujo

Runtime error

File size: 3,049 Bytes

import annoy
from typing import Tuple
import json

EMBEDDING_DIMENSION = 4096
ANNOY_INDEX_FILE = "models/index.ann"
ANNOY_MAPPING_FILE = "models/mappings.json"
IMG_RESIZE_SIZE = 224

####

def load_annoy_index(
    index_file=ANNOY_INDEX_FILE,
    mapping_file=ANNOY_MAPPING_FILE,
) -> Tuple[annoy.AnnoyIndex, dict]:
    """Load annoy index and associated mapping file"""

    annoy_index = annoy.AnnoyIndex(f=EMBEDDING_DIMENSION, metric='angular')
    annoy_index.load(index_file)

    with open(ANNOY_MAPPING_FILE) as f:
        mappings = json.load(f)

    with open(mapping_file) as f:
        mapping = json.load(f)
        mapping = {int(k): v for k, v in mapping.items()}
    return annoy_index, mappings

###

import torch
from torch import nn
from torchvision import models, transforms
# from torchvision.models.vgg import VGG16_Weights


# Transform the image, so it becomes readable with the model
transform = transforms.Compose([
  transforms.ToPILImage(),
  # transforms.CenterCrop(512),
  # transforms.Resize(448),
  transforms.Resize((IMG_RESIZE_SIZE, IMG_RESIZE_SIZE)),
  transforms.ToTensor()                              
])

import cv2

class FeatureExtractor(nn.Module):
  def __init__(self, model):
    super(FeatureExtractor, self).__init__()
		# Extract VGG-16 Feature Layers
    self.features = list(model.features)
    self.features = nn.Sequential(*self.features)
		# Extract VGG-16 Average Pooling Layer
    self.pooling = model.avgpool
		# Convert the image into one-dimensional vector
    self.flatten = nn.Flatten()
		# Extract the first part of fully-connected layer from VGG16
    self.fc = model.classifier[0]
  
  def forward(self, x):
		# It will take the input 'x' until it returns the feature vector called 'out'
    out = self.features(x)
    out = self.pooling(out)
    out = self.flatten(out)
    out = self.fc(out) 
    return out 

# Initialize the model
# model = models.vgg16(weights=VGG16_Weights.DEFAULT)
model = models.vgg16(pretrained=True)
new_model = FeatureExtractor(model)

# Change the device to GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
new_model = new_model.to(device)

import PIL

def analyze_image(
    image, annoy_index, n_matches: int = 1, num_jitters: int = 1, model: str = "large"
):
    PIL.Image.fromarray(image).save("input_img.png")
    img = cv2.imread("input_img.png")
    # Transform the image
    img = transform(img)
    # Reshape the image. PyTorch model reads 4-dimensional tensor
    # [batch_size, channels, width, height]
    # img = img.reshape(1, 3, 448, 448)
    img = img.reshape(1, 3, IMG_RESIZE_SIZE, IMG_RESIZE_SIZE)
    img = img.to(device)
    # We only extract features, so we don't need gradient
    with torch.no_grad():
        # Extract the feature from the image
        feature = new_model(img)
    # Convert to NumPy Array, Reshape it, and save it to features variable
    v = feature.cpu().detach().numpy().reshape(-1)

    results = annoy_index.get_nns_by_vector(v, n_matches, include_distances=True)

    return results