from datasets import load_dataset, concatenate_datasets from sentence_transformers import SentenceTransformer from torchvision import transforms from models.encoder import Encoder from indexer import Indexer import numpy as np import torch import os model = SentenceTransformer('intfloat/multilingual-e5-base') encoder = Encoder() encoder.load_state_dict(torch.load('./models/encoder.bin', map_location=torch.device('cpu'))) dataset = load_dataset("Ransaka/youtube_recommendation_data", token=os.environ.get('HF')) dataset = concatenate_datasets([dataset['train'], dataset['test']]) latent_data = torch.load("data/latent_data_final.bin") embeddings = torch.load("data/embeddings.bin") def row_wise_normalize_and_concatenate(array1, array2): normalized_array1 = array1 / np.linalg.norm(array1, axis=1, keepdims=True) normalized_array2 = array2 / np.linalg.norm(array2, axis=1, keepdims=True) concatenated_array = np.concatenate((normalized_array1, normalized_array2), axis=0) return concatenated_array result_array = row_wise_normalize_and_concatenate(latent_data, embeddings) index = Indexer(result_array) def get_recommendations(image, title, k): title = [dataset[product_id]['title']] title_embeds = model.encode([title], normalize_embeddings=True) image = transforms.ToTensor()(image.convert("L")) image_embeds = encoder(image).detach().numpy() final_embeds = np.concatenate((image_embeds,title_embeds), axis=0) candidates = image_embedding_index.topk(final_embeds,k=k) # title_candidates = text_embedding_index.topk(title_embeds, k=k) final_candidates = [] final_candidates.append(list(candidates[0])) #final_candidates.append(list(title_candidates[0])) final_candidates = sum(final_candidates,[]) # final_candidates = list(set(final_candidates)) results_dict = {"image":[], "title":[]} for candidate in final_candidates: results_dict['image'].append(dataset['image'][candidate]) results_dict['title'].append(dataset['title'][candidate]) return results_dict