AmirShabani's picture
Sooti
26cc869
raw
history blame contribute delete
No virus
3.04 kB
import pickle
import numpy as np
import pandas as pd
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import BallTree
addr = './'
user_recomms_file = open(addr + 'user_recomms.pkl', "rb")
user_recomms = pickle.load(user_recomms_file)
user_recomms_file.close()
final_movies_file = open(addr + 'final_movies.pkl', "rb")
final_movies = pickle.load(final_movies_file)
final_movies_file.close()
movie_embeds_file = open(addr + 'movie_embeds.pkl', "rb")
movie_embeds = pickle.load(movie_embeds_file)
movie_embeds_file.close()
btree_file = open(addr + 'btree.pkl', "rb")
btree = pickle.load(btree_file)
btree_file.close()
user_embeds_file = open(addr + 'user_embeds.pkl', "rb")
user_embeds = pickle.load(user_embeds_file)
user_embeds_file.close()
user_mapping_file = open(addr + 'user_mapping.pkl', "rb")
user_mapping = pickle.load(user_mapping_file)
user_mapping_file.close()
movie_mapping_file = open(addr + 'movie_mapping.pkl', "rb")
movie_mapping = pickle.load(movie_mapping_file)
movie_mapping_file.close()
user_pos_items_file = open(addr + 'user_pos_items.pkl', "rb")
user_pos_items = pickle.load(user_pos_items_file)
user_pos_items_file.close()
def create_user_embedding(movie_ratings, movies_df):
# Convert the movie_ratings dictionary to a dataframe
user_ratings_df = pd.DataFrame.from_dict(movie_ratings, orient='index', columns=['rating'])
user_ratings_df['movieId'] = user_ratings_df.index
# Merge the user_ratings_df with the movies_df to get the movie embeddings
user_movie_embeddings = user_ratings_df.merge(movies_df, on='movieId', how='left')
print(user_ratings_df)
print(user_movie_embeddings)
# Multiply the ratings with the movie embeddings
user_movie_embeddings = user_movie_embeddings.iloc[:, 2:].values * user_movie_embeddings['rating'].values[:, np.newaxis]
# Calculate the user embedding as the sum of the movie embeddings
user_embedding = np.sum(user_movie_embeddings, axis=0)
np.nan_to_num(user_embedding, 0)
print(user_movie_embeddings.shape)
return user_embedding
def find_closest_user(user_embedding, tree, user_embeddings):
# Query the BallTree to find the closest user to the given user_embedding
_, closest_user_index = tree.query([user_embedding], k=1)
# Get the embedding of the closest user
closest_user_embedding = user_embeddings.iloc[closest_user_index[0][0]]
return closest_user_embedding
def output_list(movie_ratings, movies_df = movie_embeds, tree = btree, user_embeddings = user_embeds, movies = final_movies):
user_embed = create_user_embedding(movie_ratings, movie_embeds)
# Call the find_closest_user function with the pre-built BallTree
closest_user_embed = find_closest_user(user_embed, tree, user_embeds)
recomms = user_recomms[int(closest_user_embed['userId'])]
out = [movies['title'].iloc[movie_id] for movie_id in recomms]
return out
# output_list({1:1,2:2,3:3,4:4,5:5})