|
import gradio as gr |
|
import numpy as np |
|
import pandas as pd |
|
from scipy.sparse import csr_matrix |
|
from sklearn.neighbors import NearestNeighbors |
|
|
|
|
|
def create_matrix(df): |
|
N = len(df['userId'].unique()) |
|
M = len(df['movieId'].unique()) |
|
|
|
user_mapper = dict(zip(np.unique(df["userId"]), list(range(N)))) |
|
movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(M)))) |
|
|
|
user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"]))) |
|
movie_inv_mapper = dict(zip(list(range(M)), np.unique(df["movieId"]))) |
|
user_index = [user_mapper[i] for i in df['userId']] |
|
movie_index = [movie_mapper[i] for i in df['movieId']] |
|
X = csr_matrix((df["rating"], (movie_index, user_index)), shape=(M, N)) |
|
return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper |
|
|
|
|
|
def find_similar_movies(movie_id, X, k, metric='cosine', show_distance=False): |
|
neighbour_ids = [] |
|
movie_ind = movie_mapper[movie_id] |
|
movie_vec = X[movie_ind] |
|
k += 1 |
|
kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric) |
|
kNN.fit(X) |
|
movie_vec = movie_vec.reshape(1, -1) |
|
neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance) |
|
for i in range(0, k): |
|
n = neighbour.item(i) |
|
neighbour_ids.append(movie_inv_mapper[n]) |
|
neighbour_ids.pop(0) |
|
return neighbour_ids |
|
|
|
|
|
def recommend_movies(movie_name): |
|
movie_id = [k for k, v in movie_titles.items() if movie_name.lower() in v.lower()] |
|
if len(movie_id) == 0: |
|
return "Movie not found. Please check the spelling and try again" |
|
movie_id = movie_id[0] |
|
similar_ids = find_similar_movies(movie_id, X, k=10) |
|
recommendations = "\n".join([movie_titles[i] for i in similar_ids]) |
|
return recommendations |
|
|
|
|
|
|
|
ratings = pd.read_csv("ratings.csv") |
|
movies = pd.read_csv("movies.csv") |
|
n_ratings = len(ratings) |
|
n_movies = len(ratings['movieId'].unique()) |
|
n_users = len(ratings['userId'].unique()) |
|
user_freq = ratings[['userId', 'movieId']].groupby('userId').count().reset_index() |
|
user_freq.columns = ['userId', 'n_ratings'] |
|
mean_rating = ratings.groupby('movieId')[['rating']].mean() |
|
lowest_rated = mean_rating['rating'].idxmin() |
|
highest_rated = mean_rating['rating'].idxmax() |
|
movie_stats = ratings.groupby('movieId')[['rating']].agg(['count', 'mean']) |
|
movie_stats.columns = movie_stats.columns.droplevel() |
|
X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_matrix(ratings) |
|
movie_titles = dict(zip(movies['movieId'], movies['title'])) |
|
|
|
|
|
movie_name = gr.inputs.Textbox(label="Movie Name") |
|
outputs = gr.outputs.Textbox(label="Recommended Movies", type="text") |
|
|
|
iface = gr.Interface( |
|
fn=recommend_movies, |
|
inputs=movie_name, |
|
outputs=outputs, |
|
theme=gr.themes.Default(primary_hue="slate")) |
|
) |
|
iface.launch() |
|
|